| Scanner Skeleton |
note
description: "General lexical analyzers"
library: "Gobo Eiffel Lexical Library"
copyright: "Copyright (c) 2001-2019, Eric Bezault and others"
license: "MIT License"
deferred class YY_SCANNER
create
make
-- Create a new scanner with
-- standard input as input file.
make_with_file (a_file: KI_CHARACTER_INPUT_STREAM)
-- Create a new scanner with
-- a_file as input file.
-- To be used when a_file contains ISO-8859-1 characters,
-- or when it is using the UTF-8 encoding and the scanner is
-- either using the "%option utf8" or has been manually written
-- to expect sequences of UTF-8 bytes.
require
a_file_not_void: a_file /= Void
a_file_open_read: a_file.is_open_read
make_with_unicode_file (a_file: KI_CHARACTER_INPUT_STREAM)
-- Create a new scanner with a_file as input file.
-- a_file is expected to be encoded in UTF-8
-- or ISO-8859-1, and the input buffer will handle
-- the corresponding Unicode characters.
require
a_file_not_void: a_file /= Void
a_file_open_read: a_file.is_open_read
make_with_buffer (a_buffer: like input_buffer)
-- Create a new scanner with
-- a_buffer as input buffer.
require
a_buffer_not_void: a_buffer /= Void
ensure
input_buffer_set: input_buffer = a_buffer
feature -- Initialization
reset
-- Reset scanner before scanning next input source.
-- (This routine can be called in wrap before scanning
-- another input buffer.)
reset_with_file (a_file: KI_CHARACTER_INPUT_STREAM)
-- Reset scanner before scanning next input source.
-- Then reuse input_buffer and set it to a_file
-- if it was a file buffer, create a new file input buffer
-- with a_file otherwise.
require
a_file_not_void: a_file /= Void
a_file_open_read: a_file.is_open_read
reset_with_string (a_string: STRING_8)
-- Reset scanner before scanning next input source.
-- Then reuse input_buffer and set it to a_string
-- if it was not Empty_buffer, create a new input buffer
-- with a_string otherwise.
require
a_string_not_void: a_string /= Void
reset_start_condition
-- Clear pushed start conditions and set start_condition
-- to the "INITIAL" start condition.
ensure
pushed_start_conditions_cleared: pushed_start_condition_count = 0
feature -- Access
last_token: INTEGER
-- Code of last token read
-- (0 means that the end-of-input has been reached,
-- non-positive values mean that an error occurred
-- (see header-comment of scanning_error.))
text: STRING_8
-- Text of last token read
-- (Create a new string at each call.)
--
-- Note that if input_buffer contains Unicode characters
-- which cannot be represented as 8-bit characters, they
-- will be replaced by a replacement character specified
-- in the buffer.
ensure
text_not_void: Result /= Void
correct_count: Result.count = text_count
unicode_text: STRING_32
-- Unicode text of last token read
-- (Create a new string at each call.)
--
-- Note that if the scanner is written to receive sequences
-- of UTF-8 bytes, unicode_text will treat each single
-- byte as a character. It will not try to decode the UTF-8 bytes
-- into Unicode characters.
--
-- Note that unicode_text does not contain surrogate
-- or invalid Unicode characters.
ensure
unicode_text_not_void: Result /= Void
correct_count: Result.count = text_count
utf8_text: STRING_8
-- UTF-8 representation of last token read
-- (Create a new string at each call.)
--
-- Note that unicode_text does not contain surrogate
-- or invalid Unicode characters, therefore the resulting
-- string is valid UTF-8.
ensure
utf8_text_not_void: Result /= Void
utf8_text_is_string_8: Result.same_type ({STRING_8} "")
valid_utf8: {UC_UTF8_ROUTINES}.valid_utf8 (Result)
correct_count: Result.count = {UC_UTF8_ROUTINES}.string_byte_count (unicode_text)
definition: Result.is_equal ({UC_UTF8_ROUTINES}.string_to_utf8 (unicode_text))
text_item (i: INTEGER): CHARACTER_8
-- i-th character of last token read
require
i_large_enough: i >= 1
i_small_enough: i <= text_count
ensure
definition: Result = text.item (i)
unicode_text_item (i: INTEGER): CHARACTER_32
-- i-th Unicode character of last token read
--
-- Note that unicode_text does not contain surrogate
-- or invalid Unicode characters.
require
i_large_enough: i >= 1
i_small_enough: i <= text_count
ensure
definition: Result = unicode_text.item (i)
text_substring (s, e: INTEGER): STRING
-- Substring of last token read
-- (Create a new string at each call.)
-- (For efficiency reason, this function can bypass the
-- call to text and create the substring directly from
-- the input buffer.)
require
meaningful_start: 1 <= s
meaningful_interval: s <= e + 1
meaningful_end: e <= text_count
ensure
text_substring_not_void: Result /= Void
text_substring_empty: (s > e) implies Result.is_empty
definition: Result.is_equal (text.substring (s, e))
unicode_text_substring (s, e: INTEGER): STRING_32
-- Unicode substring of last token read
-- (Create a new string at each call.)
-- (For efficiency reason, this function can bypass the
-- call to unicode_text and create the substring directly from
-- the input buffer.)
--
-- Note that unicode_text does not contain surrogate
-- or invalid Unicode characters.
require
meaningful_start: 1 <= s
meaningful_interval: s <= e + 1
meaningful_end: e <= text_count
ensure
unicode_text_substring_not_void: Result /= Void
unicode_text_substring_empty: (s > e) implies Result.is_empty
definition: Result.is_equal (unicode_text.substring (s, e))
utf8_text_substring (s, e: INTEGER): STRING_8
-- UTF-8 representation of substring of last token read
-- (Create a new string at each call.)
-- (For efficiency reason, this function can bypass the
-- call to unicode_text and create the substring directly from
-- the input buffer.)
--
-- Note that unicode_text does not contain surrogate
-- or invalid Unicode characters, therefore the resulting
-- string is valid UTF-8.
require
meaningful_start: 1 <= s
meaningful_interval: s <= e + 1
meaningful_end: e <= text_count
ensure
utf8_text_not_void: Result /= Void
utf8_text_is_string_8: Result.same_type ({STRING_8} "")
valid_utf8: {UC_UTF8_ROUTINES}.valid_utf8 (Result)
utf8_text_substring_empty: (s > e) implies Result.is_empty
definition: Result.is_equal ({UC_UTF8_ROUTINES}.string_to_utf8 (unicode_text.substring (s, e)))
correct_count: Result.count = {UC_UTF8_ROUTINES}.string_byte_count (unicode_text.substring (s, e))
start_condition: INTEGER
-- Start condition
feature -- Measurement
text_count: INTEGER
-- Length of last token read
ensure
text_count_not_negative: Result >= 0
line: INTEGER
-- Line number of last token read when
-- '%option line' has been specified
ensure
line_positive: Result >= 1
column: INTEGER
-- Column number of last token read when
-- '%option line' has been specified
ensure
column_positive: Result >= 1
position: INTEGER
-- Position of last token read (i.e. number of
-- characters from the start of the input source)
-- when '%option position' has been specified
ensure
position_positive: Result >= 1
pushed_start_condition_count: INTEGER
-- Number of start conditions already pushed (and not popped yet)
ensure
pushed_start_condition_count_not_negative: Result >= 0
feature -- Status report
end_of_file: BOOLEAN
-- Has the end of input buffer been reached?
-- This means that last_token has been set
-- to 0 indicating "all done".
scanning_error: BOOLEAN
-- Has an error occurred during scanning?
-- This can occur when too many reject are called (and hence
-- nothing can be matched anymore) or when the option "nodefault"
-- (or option -s) has been specified but the default rule is
-- matched nevertheless.
valid_start_condition (sc: INTEGER): BOOLEAN
-- Is sc a valid start condition?
feature -- Setting
set_last_token (a_token: INTEGER)
-- Set last_token to a_token.
ensure
last_token_set: last_token = a_token
set_start_condition (a_start_condition: INTEGER)
-- Set start_condition to a_start_condition.
require
valid_start_condition: valid_start_condition (a_start_condition)
ensure
start_condition_set: start_condition = a_start_condition
push_start_condition (a_start_condition: INTEGER)
-- Set start condition and add previous to stack.
require
valid_start_condition: valid_start_condition (a_start_condition)
ensure
start_condition_set: start_condition = a_start_condition
one_more: pushed_start_condition_count = old pushed_start_condition_count + 1
pop_start_condition
-- Restore previous start condition.
require
has_pushed_start_conditions: pushed_start_condition_count > 0
ensure
one_less: pushed_start_condition_count = old pushed_start_condition_count - 1
feature -- Scanning
scan
-- Scan input_buffer until end of file is found
-- or an error occurs.
ensure
end_of_file: not scanning_error implies end_of_file
read_token
-- Read a token from input_buffer.
-- Make result available in last_token.
feature -- Element change
append_text_to_string (a_string: STRING_8)
-- Append text at end of a_string.
-- (For efficiency reason, this feature can bypass the
-- call to text and directly copy the characters from
-- the input buffer.)
require
a_string_not_void: a_string /= Void
ensure
count_set: a_string.count = old (a_string.count) + text_count
definition: a_string.substring (old (a_string.count) + 1, a_string.count).same_string (text)
append_unicode_text_to_string (a_string: STRING_32)
-- Append unicode_text at end of a_string.
-- (For efficiency reason, this feature can bypass the
-- call to unicode_text and directly copy the characters from
-- the input buffer.)
require
a_string_not_void: a_string /= Void
ensure
count_set: a_string.count = old (a_string.count) + text_count
definition: a_string.substring (old (a_string.count) + 1, a_string.count).same_string (unicode_text)
append_utf8_text_to_string (a_string: STRING_8)
-- Append utf8_text at end of a_string.
-- (For efficiency reason, this feature can bypass the
-- call to utf8_text and directly copy the characters from
-- the input buffer.)
require
a_string_not_void: a_string /= Void
a_string_is_string_8: a_string.same_type ({STRING_8} "")
ensure
count_set: a_string.count = old (a_string.count) + utf8_text.count
definition: a_string.substring (old (a_string.count) + 1, a_string.count).same_string (utf8_text)
append_text_substring_to_string (s, e: INTEGER; a_string: STRING_8)
-- Append text_substring at end of a_string.
-- (For efficiency reason, this feature can bypass
-- the call to text_substring and directly copy
-- the characters from the input buffer.)
require
a_string_not_void: a_string /= Void
s_large_enough: 1 <= s
valid_interval: s <= e + 1
e_small_enough: e <= text_count
ensure
count_set: a_string.count = old (a_string.count) + (e - s + 1)
definition: a_string.substring (old (a_string.count) + 1, a_string.count).same_string (text_substring (s, e))
append_unicode_text_substring_to_string (s, e: INTEGER; a_string: STRING_32)
-- Append unicode_text_substring at end of a_string.
-- (For efficiency reason, this feature can bypass
-- the call to unicode_text_substring and directly copy
-- the characters from the input buffer.)
require
a_string_not_void: a_string /= Void
s_large_enough: 1 <= s
valid_interval: s <= e + 1
e_small_enough: e <= text_count
ensure
count_set: a_string.count = old (a_string.count) + (e - s + 1)
definition: a_string.substring (old (a_string.count) + 1, a_string.count).same_string (unicode_text_substring (s, e))
append_utf8_text_substring_to_string (s, e: INTEGER; a_string: STRING_8)
-- Append utf8_text_substring at end of a_string.
-- (For efficiency reason, this feature can bypass the
-- call to utf8_text_substring and directly copy the characters from
-- the input buffer.)
require
a_string_not_void: a_string /= Void
a_string_is_string_8: a_string.same_type ({STRING_8} "")
s_large_enough: 1 <= s
valid_interval: s <= e + 1
e_small_enough: e <= text_count
ensure
count_set: a_string.count = old (a_string.count) + utf8_text_substring (s, e).count
definition: a_string.substring (old (a_string.count) + 1, a_string.count).same_string (utf8_text_substring (s, e))
terminate
-- Terminate scanner and set last_token
-- to 0 indicating "all done".
wrap: BOOLEAN
-- Should current scanner terminate when end of file is reached?
-- This function can be redefined to switch to another input
-- buffer (but don't forget to update start_condition).
-- (Default: True.)
more
-- Tell scanner to append the next matched token
-- to current value of text instead of
-- replacing it.
less (n: INTEGER)
-- Return all but the first n matched
-- characters back to input_buffer.
require
n_large_enough: n >= 0
n_small_enough: n <= text_count
ensure
text_count_set: text_count = n
unread_character (c: CHARACTER_8)
-- Put c back to input_buffer. This will alter both
-- text and the content of input_buffer.
unread_unicode_character (c: CHARACTER_32)
-- Put c back to input_buffer. This will alter both
-- unicode_text and the content of input_buffer.
-- The behavior is undefined if c is too large to fit into input_buffer.
read_character
-- Read a character from input_buffer.
-- Make result available in last_character and last_unicode_character.
--
-- Note that if input_buffer contains Unicode characters
-- which cannot be represented as 8-bit characters, they
-- will be replaced by a replacement character specified
-- in the buffer.
last_character: CHARACTER_8
-- Last character read by read_character
last_unicode_character: CHARACTER_32
-- Last Unicode character read by read_character
feature -- Input
input_buffer: YY_BUFFER
-- Input buffer
set_input_buffer (a_buffer: like input_buffer)
-- Set input_buffer to a_buffer.
require
a_buffer_not_void: a_buffer /= Void
ensure
input_buffer_set: input_buffer = a_buffer
flush_input_buffer
-- Flush input_buffer. input_buffer will be automatically
-- refilled unless end of file has been found.
ensure
flushed: input_buffer.count = 0
new_file_buffer (a_file: KI_CHARACTER_INPUT_STREAM): YY_FILE_BUFFER
-- New input buffer for a_file.
-- To be used when a_file contains ISO-8859-1 characters,
-- or when it is using the UTF-8 encoding and the scanner is
-- either using the "%option utf8" or has been manually written
-- to expect sequences of UTF-8 bytes.
require
a_file_not_void: a_file /= Void
a_file_open_read: a_file.is_open_read
ensure
new_buffer_not_void: Result /= Void
new_unicode_file_buffer (a_file: KI_CHARACTER_INPUT_STREAM): YY_UNICODE_FILE_BUFFER
-- New Unicode input buffer for a_file.
-- a_file is expected to be encoded in UTF-8
-- or ISO-8859-1.
-- The scanner will receive Unicode characters,
-- not sequences of UTF-8 bytes.
require
a_file_not_void: a_file /= Void
a_file_open_read: a_file.is_open_read
ensure
new_unicode_file_buffer_not_void: Result /= Void
new_utf8_file_buffer (a_file: KI_CHARACTER_INPUT_STREAM): YY_UTF8_FILE_BUFFER
-- New UTF-8 input buffer for a_file.
-- To be used when a_file contains ISO-8859-1 characters or when it
-- is using the UTF-8 encoding, and the scanner is either using the
-- "%option utf8" or has been manually written to expect sequences
-- of UTF-8 bytes.
-- The scanner will receive sequences of UTF-8 bytes.
require
a_file_not_void: a_file /= Void
a_file_open_read: a_file.is_open_read
ensure
new_utf8_file_buffer_not_void: Result /= Void
new_string_buffer (a_string: STRIN_8): YY_BUFFER
-- New input buffer for a_string.
-- To be used when a_string contains ISO-8859-1 characters,
-- or when it is using the UTF-8 encoding and the scanner is
-- either using the "%option utf8" or has been manually written
-- to expect sequences of UTF-8 bytes.
require
a_string_not_void: a_string /= Void
a_string_is_string: a_string.same_type ({STRING_8} "")
ensure
new_buffer_not_void: Result /= Void
new_unicode_string_buffer (a_string: READABLE_STRING_GENERAL): YY_UNICODE_BUFFER
-- New Unicode input buffer for a_string.
-- To be used when a_string contains ISO-8859-1 or Unicode characters.
-- The scanner will receive Unicode characters, not sequences of UTF-8 bytes.
require
a_string_not_void: a_string /= Void
ensure
new_unicode_string_buffer_not_void: Result /= Void
new_utf8_string_buffer (a_string: READABLE_STRING_GENERAL): YY_UTF8_BUFFER
-- New UTF-8 input buffer for a_string.
-- To be used when a_string contains ISO-8859-1 or Unicode characters,
-- and the scanner is either using the "%option utf8" or has been
-- manually written to expect sequences of UTF-8 bytes.
-- a_string is expected to contain valid non-surrogate Unicode
-- characters. Invalid or surrogate Unicode characters are encoded
-- with one byte 0xFF (which is an invalid byte in UTF-8).
-- The scanner will receive sequences of UTF-8 bytes.
require
a_string_not_void: a_string /= Void
ensure
new_utf8_string_buffer_not_void: Result /= Void
Empty_buffer: YY_BUFFER
-- Empty input buffer
ensure
empty_buffer_not_void: Result /= Void
feature -- Output
output (a_text: like text)
-- Output a_text.
-- (Note: this routine can be redefined in descendant
-- classes. Default: print a_text to standard output.)
require
a_text_not_void: a_text /= Void
echo
-- Output text using feature output.
feature -- Action
pre_action
-- Action executed before every semantic action
-- when '%option pre-action' has been specified.
-- (Note: this routine can be redefined in descendant
-- classes. Default: do nothing.)
post_action
-- Action executed after every semantic action
-- when '%option post-action' has been specified.
-- (Note: this routine can be redefined in descendant
-- classes. Default: do nothing.)
pre_eof_action
-- Action executed before every end-of-file semantic action
-- (i.e. <<EOF>>) when '%option pre-eof-action' has been specified.
-- (Note: this routine can be redefined in descendant classes.
-- Default: do nothing.)
post_eof_action
-- Action executed after every end-of-file semantic action
-- (i.e. <<EOF>>) when '%option post-eof-action' has been specified.
-- (Note: this routine can be redefined in descendant classes.
-- Default: do nothing.)
default_action
-- Action executed when default rule is matched.
-- (Note: this routine can be redefined in descendant classes.
-- Default: print last character read to standard output.)
feature -- Error handling
fatal_error (a_message: STRING_8)
-- A fatal error occurred.
-- Print a_message.
require
a_message_not_void: a_message /= Void
report_invalid_unicode_character_error (a_code: NATURAL_32)
-- Report that the surrogate or invalid Unicode character
-- with code a_code has been read from the input
-- buffer and caused the scanner to fail.
feature -- Debugging
print_last_token
-- Print to standard error debug information
-- about the last token read. Can be redefined
-- in descendant classes to print more information.
-- (Called at the end of read_token when compiled
-- with 'debug ("GELEX")' enabled).
invariant
input_buffer_not_void: input_buffer /= Void
valid_start_condition: valid_start_condition (start_condition)
end
|
Copyright © 2001-2019, Eric
Bezault and others mailto:ericb@gobosoft.com http://www.gobosoft.com Last Updated: 28 September 2019 |