| ConstraintGroup.java |
1 /*
2 * ConstraintGroup.java - transducer class
3 *
4 * Copyright (c) 1998-2004, The University of Sheffield.
5 *
6 * This file is part of GATE (see http://gate.ac.uk/), and is free
7 * software, licenced under the GNU Library General Public License,
8 * Version 2, June 1991 (in the distribution as file licence.html,
9 * and also available at http://gate.ac.uk/gate/licence.html).
10 *
11 * Hamish Cunningham, 24/07/98
12 *
13 * $Id: ConstraintGroup.java,v 1.10 2004/07/21 17:10:07 akshay Exp $
14 */
15
16
17 package gate.jape;
18
19 import java.util.ArrayList;
20 import java.util.Iterator;
21
22 import gate.AnnotationSet;
23 import gate.Document;
24 import gate.annotation.AnnotationSetImpl;
25 import gate.util.Strings;
26
27
28 /**
29 * A sequence of conjunctions of PatternElement that form a
30 * disjunction.
31 */
32 public class ConstraintGroup
33 extends PatternElement implements JapeConstants, java.io.Serializable
34 {
35 /** Debug flag */
36 private static final boolean DEBUG = false;
37
38 /** Anonymous constructor. */
39 public ConstraintGroup() {
40 patternElementDisjunction1 = new ArrayList();
41 currentConjunction = new ArrayList();
42 patternElementDisjunction1.add(currentConjunction);
43 } // Anonymous constructor
44
45 /** Need cloning for processing of macro references. See comments on
46 * <CODE>PatternElement.clone()</CODE>
47 */
48 public Object clone() {
49 ConstraintGroup newPE = (ConstraintGroup) super.clone();
50
51 // created by createDisjunction
52 newPE.currentConjunction = null;
53
54 newPE.patternElementDisjunction1 = new ArrayList();
55 // for each (conjunction) member of the pattern element discjunction
56 for(
57 Iterator disjunction = patternElementDisjunction1.iterator();
58 disjunction.hasNext();
59
60 ) {
61
62 newPE.createDisjunction();
63 // for each pattern element making up this conjunction
64 for(
65 Iterator conjunction = ((ArrayList) disjunction.next()).iterator();
66 conjunction.hasNext();
67
68 ) {
69 PatternElement pat = (PatternElement) conjunction.next();
70
71 newPE.addPatternElement((PatternElement) pat.clone());
72 } // for each element of the conjunction
73 } // for each conjunction (element of the disjunction)
74
75 return newPE;
76 } // clone
77
78 /** An array of arrays that represent PatternElement conjunctions
79 * during parsing of the .jape. Each conjunction is
80 * considered as being disjunct with the next. (I.e. they are
81 * or'd, in the same way as expressions around "||" in C and
82 * Java.) Set during parsing; replaced by finish().
83 */
84 private ArrayList patternElementDisjunction1;
85
86 /** The pattern element disjunction for transduction - Java arrays. */
87 private PatternElement[][] patternElementDisjunction2;
88
89 /** An array of PatternElements making up a conjunction. It is a member of
90 * patternElementDisjunction. This is the one we're adding to
91 * at present. Used during parsing, not matching.
92 */
93 private ArrayList currentConjunction;
94
95 /** Make a new disjunction at this point. */
96 public void createDisjunction() {
97 currentConjunction = new ArrayList();
98 patternElementDisjunction1.add(currentConjunction);
99 } // createDisjunction
100
101 /** Add an element to the current conjunction. */
102 public void addPatternElement(PatternElement pe) {
103 currentConjunction.add(pe);
104 } // addPatternElement
105
106 /** Get an list of CPEs that we contain. */
107 protected Iterator getCPEs() {
108 ArrayList cpes = new ArrayList();
109
110 // for each (conjunction) member of the pattern element discjunction
111 for(
112 Iterator disjunction = patternElementDisjunction1.iterator();
113 disjunction.hasNext();
114 ) {
115 // for each pattern element making up this conjunction
116 for(
117 Iterator conjunction = ((ArrayList) disjunction.next()).iterator();
118 conjunction.hasNext();
119 ) {
120 PatternElement pat = (PatternElement) conjunction.next();
121
122 Iterator i = null;
123 if(pat instanceof ComplexPatternElement) {
124 cpes.add(pat);
125 i = ((ComplexPatternElement) pat).getCPEs();
126 }
127 else if(pat instanceof ConstraintGroup)
128 i = ((ConstraintGroup) pat).getCPEs();
129
130 if(i != null)
131 for( ; i.hasNext(); )
132 cpes.add(i.next());
133 } // for each element of the conjunction
134 } // for each conjunction (element of the disjunction)
135
136 return cpes.iterator();
137 } // getCPEs
138
139 /** Finish: replace dynamic data structures with Java arrays; called
140 * after parsing.
141 */
142 public void finish() {
143
144 // index into patternElementDisjunction2
145 int i = 0;
146
147 // index into the conjunctions (second dimension of pED2)
148 int j = 0;
149
150 patternElementDisjunction2 =
151 new PatternElement[patternElementDisjunction1.size()][];
152
153 // for each (conjunction) member of the pattern element discjunction
154 for(
155 Iterator disjuncIter = patternElementDisjunction1.iterator();
156 disjuncIter.hasNext();
157 i++
158 ) {
159 ArrayList conjunction = (ArrayList) disjuncIter.next();
160 patternElementDisjunction2[i] = new PatternElement[conjunction.size()];
161 j = 0;
162
163 // for each pattern element making up this conjunction
164 for(
165 Iterator conjIter = conjunction.iterator();
166 conjIter.hasNext();
167 j++
168 ) {
169 patternElementDisjunction2[i][j] = (PatternElement) conjIter.next();
170 patternElementDisjunction2[i][j].finish();
171 } // loop on conjunction
172
173 } // loop on patternElementDisjunction1
174
175 patternElementDisjunction1 = null;
176 } // finish
177
178 /** Access to the annotations that have been matched by this group. */
179 public AnnotationSet getMatchedAnnots() {
180 AnnotationSet matchedAnnots = new AnnotationSetImpl((Document) null);
181 int pEDLen = patternElementDisjunction2.length;
182
183 // for each (conjunction) member of the pattern element disjunction
184 for(int i = 0; i < pEDLen; i++) {
185 int conjLen = patternElementDisjunction2[i].length;
186
187 // for each pattern element making up this conjunction
188 for(int j = 0; j < conjLen; j++) {
189 PatternElement pat = patternElementDisjunction2[i][j];
190 AnnotationSet patMatchedAnnots = pat.getMatchedAnnots();
191 if(patMatchedAnnots != null)
192 matchedAnnots.addAll(pat.getMatchedAnnots());
193 } // for each element of the conjunction
194
195 } // for each conjunction (element of the disjunction)
196
197 return matchedAnnots;
198 } // getMatchedAnnots
199
200
201 /** Clear all the annotations that have been matched by this group. */
202 public void reset() {
203 // Debug.pr(this, "CG reset, matchHistory.size() = " + matchHistory.size());
204 int pEDLen = patternElementDisjunction2.length;
205
206 // for each (conjunction) member of the pattern element disjunction
207 for(int i = 0; i < pEDLen; i++) {
208 int conjLen = patternElementDisjunction2[i].length;
209
210 // for each pattern element making up this conjunction
211 for(int j = 0; j < conjLen; j++)
212 patternElementDisjunction2[i][j].reset();
213 }
214
215 super.reset(); // should be redundant: there for if PE.reset changes
216 } // reset
217
218 /** Multilevel rollback of annot caches etc. */
219 public void rollback(int arity) {
220 // Debug.pr(this, "CG rollback(" + arity + "), matchHistory.size() = " +
221 // matchHistory.size());
222 for(int i=0; i<arity; i++) {
223 PatternElement[] conjunction = (PatternElement[]) matchHistory.pop();
224 int conjLen = conjunction.length;
225 for(int j = 0; j < conjLen; j++)
226 conjunction[j].rollback(1);
227 }
228 } // rollback
229
230
231 /** Does this element match the document at this position? */
232 public boolean matches(
233 Document doc, int position, MutableInteger newPosition
234 ) {
235 // if a whole conjunction matches, we set newPosition to the max of
236 // rightmost advance of all the composite elements that matched, and
237 // position.
238 int rightmostAdvance = position;
239
240 // when we fail the whole disjunction, we set newPosition to the max of
241 // leftmost failure point, and position
242 int leftmostFailurePoint = Integer.MAX_VALUE;
243
244 // outerLoop:
245 // for each conjunction
246 // for each element in the conjunction
247 // if it fails continue outerLoop;
248 // return true;
249 // return false;
250
251 // for each member of the disjunctions array
252 int savedPosition = position;
253 int pEDLen = patternElementDisjunction2.length;
254 outerLoop:
255 for(int i = 0; i < pEDLen; i++) {
256 int conjLen = patternElementDisjunction2[i].length;
257 position = savedPosition;
258 rightmostAdvance = position;
259
260 // for each pattern element making up this conjunction
261 for(int j = 0; j < conjLen; j++) {
262 PatternElement pat = patternElementDisjunction2[i][j];
263
264 if(! pat.matches(doc, position, newPosition)) {
265 // reset the last failure point to the furthest we got so far
266 leftmostFailurePoint =
267 Math.min(leftmostFailurePoint, newPosition.value);
268
269 // rollback matches done in the previous elements of this conjunction
270 for(int k = j - 1; k >= 0; k--)
271 patternElementDisjunction2[i][k].rollback(1);
272
273 // try the next conjunction
274 continue outerLoop;
275 }
276
277 // reset our advance point to the furthest so far
278 position = rightmostAdvance =
279 Math.max(rightmostAdvance, newPosition.value);
280
281 } // for each element of the conjunction
282
283 // a whole conjunction matched: record advance and which conj succeeded
284 newPosition.value = rightmostAdvance;
285 matchHistory.push(patternElementDisjunction2[i]);
286 //Debug.pr(this, "CG matches: pushing");
287 return true;
288
289 } // for each conjunction (element of the disjunction)
290
291 // we reached the end of the disjunction without matching a
292 // whole conjunction
293 if(leftmostFailurePoint == Integer.MAX_VALUE)
294 leftmostFailurePoint = position + 1;
295 newPosition.value = Math.max(position + 1, leftmostFailurePoint);
296 return false; // annot caches have been rolled back already in inner loop
297 } // matches
298
299
300 /** Create a string representation of the object. */
301 public String toString() { return toString(""); }
302
303 /** Create a string representation of the object. */
304 public String toString(String pad) {
305 String newline = Strings.getNl();
306
307 StringBuffer buf =
308 new StringBuffer(pad + "CG: disjunction(" + newline);
309 String newPad = Strings.addPadding(pad, INDENT_PADDING);
310
311 boolean firstTime = true;
312
313 if(patternElementDisjunction1 != null) { // before finish()
314 // for each (conjunction) member of the pattern element discjunction
315 for(
316 Iterator disjunction = patternElementDisjunction1.iterator();
317 disjunction.hasNext();
318 ) {
319 if(firstTime) firstTime = false;
320 else buf.append(newline + pad + "|" + newline);
321
322 // for each pattern element making up this conjunction
323 for(
324 Iterator conjunction = ((ArrayList) disjunction.next()).iterator();
325 conjunction.hasNext();
326 ) {
327 buf.append(
328 ((PatternElement) conjunction.next()).toString(newPad) + newline
329 );
330 } // for each element of the conjunction
331 } // for each conjunction (element of the disjunction)
332
333 } else { // after finish
334 int pEDLen = patternElementDisjunction2.length;
335 if(firstTime) firstTime = false;
336 else buf.append(newline + pad + "|" + newline);
337
338 for(int i = 0; i < pEDLen; i++) {
339 int conjLen = patternElementDisjunction2[i].length;
340 // for each pattern element making up this conjunction
341 for(int j = 0; j < conjLen; j++)
342 buf.append(
343 patternElementDisjunction2[i][j].toString(newPad) + newline
344 );
345 }
346 }
347
348 buf.append(pad + ") CG." + newline);
349
350 return buf.toString();
351 } // toString
352
353
354 //needed by FSM
355 public PatternElement[][] getPatternElementDisjunction(){
356 return patternElementDisjunction2;
357 }
358
359 } // class ConstraintGroup
360
361
362 // $Log: ConstraintGroup.java,v $
363 // Revision 1.10 2004/07/21 17:10:07 akshay
364 // Changed copyright from 1998-2001 to 1998-2004
365 //
366 // Revision 1.9 2004/03/25 13:01:15 valyt
367 // Imports optimisation throughout the Java sources
368 // (to get rid of annoying warnings in Eclipse)
369 //
370 // Revision 1.8 2001/09/13 12:09:50 kalina
371 // Removed completely the use of jgl.objectspace.Array and such.
372 // Instead all sources now use the new Collections, typically ArrayList.
373 // I ran the tests and I ran some documents and compared with keys.
374 // JAPE seems to work well (that's where it all was). If there are problems
375 // maybe look at those new structures first.
376 //
377 // Revision 1.7 2001/09/12 11:59:33 kalina
378 // Changed the old JAPE stuff to use the new Collections API,
379 // instead of com.objectspace stuff. Will eliminate that library
380 // completely very soon! Just one class left to re-implement,
381 //
382 // ParseCPSL.jj changed accordingly. All tested and no smoke.
383 //
384 // Revision 1.6 2000/11/08 16:35:02 hamish
385 // formatting
386 //
387 // Revision 1.5 2000/10/26 10:45:30 oana
388 // Modified in the code style
389 //
390 // Revision 1.4 2000/10/16 16:44:33 oana
391 // Changed the comment of DEBUG variable
392 //
393 // Revision 1.3 2000/10/10 15:36:35 oana
394 // Changed System.out in Out and System.err in Err;
395 // Added the DEBUG variable seted on false;
396 // Added in the header the licence;
397 //
398 // Revision 1.2 2000/04/14 18:02:46 valyt
399 // Added some gate.fsm classes
400 // added some accessor function in old jape classes
401 //
402 // Revision 1.1 2000/02/23 13:46:06 hamish
403 // added
404 //
405 // Revision 1.1.1.1 1999/02/03 16:23:01 hamish
406 // added gate2
407 //
408 // Revision 1.17 1998/11/24 16:18:29 hamish
409 // fixed toString for calls after finish
410 //
411 // Revision 1.16 1998/11/01 21:21:36 hamish
412 // use Java arrays in transduction where possible
413 //
414 // Revision 1.15 1998/11/01 14:55:54 hamish
415 // fixed lFP setting in matches
416 //
417 // Revision 1.14 1998/10/30 14:06:45 hamish
418 // added getTransducer
419 //
420 // Revision 1.13 1998/10/29 12:07:49 hamish
421 // toString change
422 //
423 // Revision 1.12 1998/10/06 16:16:10 hamish
424 // negation percolation during constrain add; position advance when none at end
425 //
426 // Revision 1.11 1998/10/01 16:06:30 hamish
427 // new appelt transduction style, replacing buggy version
428 //
429 // Revision 1.10 1998/09/26 09:19:16 hamish
430 // added cloning of PE macros
431 //
432 // Revision 1.9 1998/09/17 16:48:31 hamish
433 // added macro defs and macro refs on LHS
434 //
435 // Revision 1.8 1998/08/12 19:05:43 hamish
436 // fixed multi-part CG bug; set reset to real reset and fixed multi-doc bug
437 //
438 // Revision 1.7 1998/08/12 15:39:35 hamish
439 // added padding toString methods
440 //
441 // Revision 1.6 1998/08/05 21:58:06 hamish
442 // backend works on simple test
443 //
444 // Revision 1.5 1998/08/03 19:51:20 hamish
445 // rollback added
446 //
447 // Revision 1.4 1998/07/31 13:12:16 hamish
448 // done RHS stuff, not tested
449 //
450 // Revision 1.3 1998/07/30 11:05:16 hamish
451 // more jape
452 //
453 // Revision 1.2 1998/07/29 11:06:56 hamish
454 // first compiling version
455 //
456 // Revision 1.1.1.1 1998/07/28 16:37:46 hamish
457 // gate2 lives
458