| DFSMState.java |
1 /*
2 * DFSMState.java
3 *
4 * Copyright (c) 1998-2004, The University of Sheffield.
5 *
6 * This file is part of GATE (see http://gate.ac.uk/), and is free
7 * software, licenced under the GNU Library General Public License,
8 * Version 2, June 1991 (in the distribution as file licence.html,
9 * and also available at http://gate.ac.uk/gate/licence.html).
10 *
11 * Valentin Tablan, 27/06/2000
12 *
13 * $Id: DFSMState.java,v 1.20 2004/07/21 17:10:06 akshay Exp $
14 */
15
16 /*
17 modified by OntoText, Aug 29
18
19 */
20
21 package gate.creole.tokeniser;
22
23 import java.util.*;
24
25 /** Implements a state of the deterministic finite state machine of the
26 * tokeniser.
27 * It differs from {@link FSMState FSMState} by the definition of the
28 * transition function which in this case maps character types to other states
29 * as oposed to the transition function from FSMState which maps character
30 * types to sets of states, hence the nondeterministic character.
31 * @see FSMState
32 */
33 class DFSMState implements java.io.Serializable { //extends FSMState{
34
35 /** Debug flag */
36 private static final boolean DEBUG = false;
37
38 /** Constructs a new DFSMState object and adds it to the list of deterministic
39 * states of the {@link DefaultTokeniser DefaultTokeniser} provided as owner.
40 * @param owner a {@link DefaultTokeniser DefaultTokeniser} object
41 */
42 public DFSMState(SimpleTokeniser owner){
43 myIndex = index++;
44 owner.dfsmStates.add(this);
45 }
46
47 /** Adds a new mapping in the transition function of this state
48 * @param type the UnicodeType for this mapping
49 * @param state the next state of the FSM Machine when a character of type type
50 * is read from the input.
51 */
52 void put(UnicodeType type, DFSMState state){
53 put(type.type, state);
54 } // put(UnicodeType type, DFSMState state)
55
56 /** Adds a new mapping using the actual index in the internal array.
57 * This method is for internal use only. Use
58 * {@link #put(gate.creole.tokeniser.UnicodeType,
59 * gate.creole.tokeniser.DFSMState)} instead.
60 */
61 void put(int index, DFSMState state){
62 transitionFunction[index] = state;
63 } // put(int index, DFSMState state)
64
65 /** This method is used to access the transition function of this state.
66 * @param type the Unicode type identifier as the corresponding static value
67 * on {@link java.lang.Character}
68 */
69 DFSMState next(int type){//UnicodeType type){
70 return transitionFunction[type];
71 } // next
72
73 /** Returns a GML (Graph Modelling Language) representation of the edges
74 * emerging from this state
75 */
76 String getEdgesGML(){
77 ///String res = "";
78 //OT
79 StringBuffer res = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
80 Set nextSet;
81 Iterator nextSetIter;
82 DFSMState nextState;
83
84 for(int i = 0; i< transitionFunction.length; i++){
85 nextState = transitionFunction[i];
86 if(null != nextState){
87 /*
88 res += "edge [ source " + myIndex +
89 " target " + nextState.getIndex() +
90 " label \"";
91 res += SimpleTokeniser.typeMnemonics[i];
92 res += "\" ]\n";
93 */
94 //OT
95 res.append("edge [ source ");
96 res.append(myIndex);
97 res.append(" target ");
98 res.append(nextState.getIndex());
99 res.append(" label \"");
100 res.append(SimpleTokeniser.typeMnemonics[i]);
101 res.append("\" ]\n");
102 }
103 };
104 return res.toString();
105 } // getEdgesGML
106
107 /** Builds the token description for the token that will be generated when
108 * this <b>final</b> state will be reached and the action associated with it
109 * will be fired.
110 * See also {@link #setRhs(String)}.
111 */
112 void buildTokenDesc() throws TokeniserException{
113 String ignorables = " \t\f";
114 String token = null,
115 type = null,
116 attribute = null,
117 value = null
118 ///prefix = null,
119 ///read =""
120 ;
121 //OT
122 StringBuffer prefix = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
123 StringBuffer read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
124
125 LinkedList attributes = new LinkedList(),
126 values = new LinkedList();
127 StringTokenizer mainSt =
128 new StringTokenizer(rhs, ignorables + "\\\";=", true);
129
130 int descIndex = 0;
131 //phase means:
132 //0 == looking for type;
133 //1 == looking for attribute;
134 //2 == looking for value;
135 //3 == write the attr/value pair
136 int phase = 0;
137
138 while(mainSt.hasMoreTokens()) {
139 token = SimpleTokeniser.skipIgnoreTokens(mainSt);
140
141 if(token.equals("\\")){
142 if(null == prefix)
143 ///prefix = mainSt.nextToken();
144 //OT
145 prefix = new StringBuffer(mainSt.nextToken());
146 else ///prefix += mainSt.nextToken();
147 //OT
148 prefix.append(mainSt.nextToken());
149 continue;
150 } else if(null != prefix) {
151 ///read += prefix;
152 //OT
153 read.append(prefix.toString());
154 prefix = null;
155 }
156
157 if(token.equals("\"")){
158 ///read = mainSt.nextToken("\"");
159 //OT
160 read = new StringBuffer(mainSt.nextToken("\""));
161 if(read.equals("\"")) ///read = "";
162 read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
163 else {
164 //delete the remaining enclosing quote and restore the delimiters
165 mainSt.nextToken(ignorables + "\\\";=");
166 }
167
168 } else if(token.equals("=")) {
169
170 if(phase == 1){
171 ///attribute = read;
172 //OT
173 attribute = read.toString();
174 ///read = "";
175 //OT
176 read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
177 phase = 2;
178 }else throw new TokeniserException("Invalid attribute format: " +
179 read);
180 } else if(token.equals(";")) {
181 if(phase == 0){
182 ///type = read;
183 type = read.toString();
184 ///read = "";
185 read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
186 //Out.print("Type: " + type);
187 attributes.addLast(type);
188 values.addLast("");
189 phase = 1;
190 } else if(phase == 2) {
191 ///value = read;
192 value = read.toString();
193 ///read = "";
194 read = new StringBuffer(gate.Gate.STRINGBUFFER_SIZE);
195 phase = 3;
196 } else throw new TokeniserException("Invalid value format: " +
197 read);
198 } else ///read += token;
199 read.append(token);
200
201 if(phase == 3) {
202 // Out.print("; " + attribute + "=" + value);
203 attributes.addLast(attribute);
204 values.addLast(value);
205 phase = 1;
206 }
207 }
208 //Out.println();
209 if(attributes.size() < 1)
210 throw new InvalidRuleException("Invalid right hand side " + rhs);
211 tokenDesc = new String[attributes.size()][2];
212
213 for(int i = 0; i < attributes.size(); i++) {
214 tokenDesc[i][0] = (String)attributes.get(i);
215 tokenDesc[i][1] = (String)values.get(i);
216 }
217
218 // for(int i = 0; i < attributes.size(); i++){
219 // Out.println(tokenDesc[i][0] + "=" +
220 // tokenDesc[i][1]);
221 // }
222 } // buildTokenDesc
223
224 /** Sets the right hand side associated with this state. The RHS is
225 * represented as a string value that will be parsed by the
226 * {@link #buildTokenDesc()} method being converted in a table of strings
227 * with 2 columns and as many lines as necessary.
228 * @param rhs the RHS string
229 */
230 void setRhs(String rhs) { this.rhs = rhs; }
231
232 /** Returns the RHS string*/
233 String getRhs(){return rhs;}
234
235 /** Checks whether this state is a final one*/
236 boolean isFinal() { return (null != rhs); }
237
238 /** Returns the unique ID of this state.*/
239 int getIndex() { return myIndex; }
240
241 /** Returns the token description associated with this state. This description
242 * is built by {@link #buildTokenDesc()} method and consists of a table of
243 * strings having two columns.
244 * The first line of the table contains the annotation type on the first
245 * position and nothing on the second.
246 * Each line after the first one contains a attribute on the first position
247 * and its associated value on the second.
248 */
249 String[][] getTokenDesc() {
250 return tokenDesc;
251 }
252
253 /** A table of strings describing an annotation.
254 * The first line of the table contains the annotation type on the first
255 * position and nothing on the second.
256 * Each line after the first one contains a attribute on the first position
257 * and its associated value on the second.
258 */
259 String[][] tokenDesc;
260
261 /** The transition function of this state.
262 */
263 DFSMState[] transitionFunction = new DFSMState[SimpleTokeniser.maxTypeId];
264
265 /** The string of the RHS of the rule from which the token
266 * description is built
267 */
268 String rhs;
269
270 /** The unique index of this state*/
271 int myIndex;
272
273 /** Used to generate unique indices for all the objects of this class*/
274 static int index;
275
276 static {
277 index = 0;
278 }
279
280 } // class DFSMState
281