| TestJape.java |
1 /*
2 * TestJape.java
3 *
4 * Copyright (c) 1998-2004, The University of Sheffield.
5 *
6 * This file is part of GATE (see http://gate.ac.uk/), and is free
7 * software, licenced under the GNU Library General Public License,
8 * Version 2, June 1991 (in the distribution as file licence.html,
9 * and also available at http://gate.ac.uk/gate/licence.html).
10 *
11 * Hamish Cunningham, 23/Feb/00
12 *
13 * $Id: TestJape.java,v 1.49 2004/08/06 16:08:26 valyt Exp $
14 */
15
16 package gate.jape;
17
18 import java.io.IOException;
19 import java.util.Date;
20 import java.util.Iterator;
21
22 import junit.framework.*;
23
24 import gate.*;
25 import gate.creole.ResourceInstantiationException;
26 import gate.creole.gazetteer.DefaultGazetteer;
27 import gate.creole.tokeniser.DefaultTokeniser;
28 import gate.util.*;
29
30
31
32 /** Tests for the Corpus classes
33 */
34 public class TestJape extends TestCase
35 {
36 /** Debug flag */
37 private static final boolean DEBUG = false;
38
39 /** Construction */
40 public TestJape(String name) { super(name); }
41
42 /** Fixture set up */
43 public void setUp() {
44 //Out.println("TestJape.setUp()");
45 } // setUp
46
47 /** Test using the large "combined" grammar from the gate/resources
48 * tree.
49 */
50 public void _testCombined() throws IOException, GateException, Exception {
51 DoTestBigGrammar("AveShort");
52
53 /*
54 Corpus c = Factory.newCorpus("TestJape corpus");
55 c.add(
56 Factory.newDocument(Files.getResourceAsString("texts/doc0.html"))
57 );
58
59 //add some annotations on the first (only) document in corpus c
60 Document doc = (Document) c.first();
61 AnnotationSet defaultAS = doc.getAnnotations();
62 FeatureMap feat = Factory.newFeatureMap();
63 defaultAS.add(new Long( 2), new Long( 4), "A",feat);
64 defaultAS.add(new Long( 4), new Long(6), "B",feat);
65 defaultAS.add(new Long(6), new Long(8), "C",feat);
66 defaultAS.add(new Long(8), new Long(10), "C",feat);
67
68 // run the parser test
69 Gate.init();
70 Batch batch = null;
71 batch = new Batch("jape/combined/", "main.jape");
72
73 // test the transducers
74 batch.transduce(c);
75 //Out.println(batch.getTransducer());
76
77 // check the results
78 doc = (Document)c.first();
79 */
80 } // testCombined()
81
82 /** Batch run */
83 public void testBatch() throws Exception{
84 Corpus c = Factory.newCorpus("TestJape corpus");
85 c.add(
86 Factory.newDocument(Files.getGateResourceAsString("texts/doc0.html"))
87 );
88 //add some annotations on the first (only) document in corpus c
89 Document doc = (Document)c.get(0);
90 AnnotationSet defaultAS = doc.getAnnotations();
91
92 try {
93 FeatureMap feat = Factory.newFeatureMap();
94 // defaultAS.add(new Long( 0), new Long( 2), "A",feat);
95 defaultAS.add(new Long( 2), new Long( 4), "A",feat);
96 // defaultAS.add(new Long( 4), new Long( 6), "A",feat);
97 // defaultAS.add(new Long( 6), new Long( 8), "A",feat);
98 defaultAS.add(new Long( 4), new Long(6), "B",feat);
99 // defaultAS.add(new Long(10), new Long(12), "B",feat);
100 // defaultAS.add(new Long(12), new Long(14), "B",feat);
101 // defaultAS.add(new Long(14), new Long(16), "B",feat);
102 // defaultAS.add(new Long(16), new Long(18), "B",feat);
103 defaultAS.add(new Long(6), new Long(8), "C",feat);
104 defaultAS.add(new Long(8), new Long(10), "C",feat);
105 // defaultAS.add(new Long(22), new Long(24), "C",feat);
106 // defaultAS.add(new Long(24), new Long(26), "C",feat);
107 } catch(gate.util.InvalidOffsetException ioe) {
108 ioe.printStackTrace(Err.getPrintWriter());
109 }
110 /*
111 // run the parser test
112 Batch batch = null;
113 // String japeFileName = "/gate/jape/Test11.jape";
114 String japeFileName = Files.getResourcePath() + "/jape/TestABC.jape";
115 // String japeFileName = "/gate/jape/Country.jape";
116 InputStream japeFileStream = Files.getResourceAsStream(japeFileName);
117 if(japeFileStream == null)
118 throw new JapeException("couldn't open " + japeFileName);
119 */
120 Batch batch = new Batch(TestJape.class.getResource(
121 Files.getResourcePath() + "/jape/TestABC.jape"), "UTF-8");
122 // test code: print the first line of the jape stream
123 // Out.println(
124 // new BufferedReader(new InputStreamReader(japeFileStream)).readLine()
125 // );
126
127 // test the transducers
128 batch.transduce(c);
129 // check the results
130 doc = (Document)c.get(0);
131 // defaultAS = doc.getAnnotations();
132 // Out.println(defaultAS);
133 } // testBatch()
134
135 public void DoTestBigGrammar(String textName) throws GateException, Exception{
136 long startCorpusLoad = 0, startCorpusTokenization = 0,
137 startGazeteerLoad = 0, startLookup = 0,
138 startJapeFileOpen = 0, startCorpusTransduce = 0,
139 endProcess = 0;
140 Out.print("Procesing " + textName + "...\n" +
141 "Started at: " + (new Date()) + "\n");
142 startCorpusLoad = System.currentTimeMillis();
143 Out.print("Loading corpus... ");
144 Corpus corpus = Factory.newCorpus("Jape Corpus");
145 try {
146 corpus.add(Factory.newDocument(
147 Files.getGateResourceAsString("jape/InputTexts/" + textName)));
148 } catch(IOException ioe) {
149 ioe.printStackTrace(Err.getPrintWriter());
150 }
151
152 if(corpus.isEmpty()) {
153 Err.println("Missing corpus !");
154 return;
155 }
156
157 //tokenize all documents
158 gate.creole.tokeniser.DefaultTokeniser tokeniser = null;
159 try {
160 //create a default tokeniser
161 FeatureMap params = Factory.newFeatureMap();
162 tokeniser = (DefaultTokeniser) Factory.createResource(
163 "gate.creole.tokeniser.DefaultTokeniser", params);
164 /*Files.getResourceAsStream("creole/tokeniser/DefaultTokeniser.rules"));*/
165 } catch(ResourceInstantiationException re) {
166 re.printStackTrace(Err.getPrintWriter());
167 }
168 startCorpusTokenization = System.currentTimeMillis();
169 Out.print(": " +
170 (startCorpusTokenization - startCorpusLoad) +
171 "ms\n");
172
173 Out.print("Tokenizing the corpus... ");
174 int progress = 0;
175 int docCnt = corpus.size();
176 Iterator docIter = corpus.iterator();
177 Document currentDoc;
178 while(docIter.hasNext()){
179 currentDoc = (Document)docIter.next();
180 tokeniser.setDocument(currentDoc);
181 //use the default anotation set
182 tokeniser.setAnnotationSetName(null);
183 tokeniser.execute();
184 // Verfy if all annotations from the default annotation set are consistent
185 gate.corpora.TestDocument.verifyNodeIdConsistency(currentDoc);
186 }
187
188 startJapeFileOpen = System.currentTimeMillis();
189 Out.print(": " + (startJapeFileOpen - startCorpusTokenization) +
190 "ms\n");
191
192 //Do gazeteer lookup
193 gate.creole.gazetteer.DefaultGazetteer gazeteer = null;
194 startGazeteerLoad = startLookup = System.currentTimeMillis();
195 Out.print("Loading gazeteer lists...");
196 try {
197 //create a default gazetteer
198 FeatureMap params = Factory.newFeatureMap();
199 gazeteer = (DefaultGazetteer) Factory.createResource(
200 "gate.creole.gazetteer.DefaultGazetteer", params);
201 gazeteer.init();
202 startLookup = System.currentTimeMillis();
203 Out.print(": " +
204 (startLookup - startGazeteerLoad) +
205 "ms\n");
206
207 Out.print("Doing gazeteer lookup... ");
208 docIter = corpus.iterator();
209 while(docIter.hasNext()){
210 currentDoc = (Document)docIter.next();
211 gazeteer.setDocument(currentDoc);
212 gazeteer.execute();
213 // Verfy if all annotations from the default annotation set are consistent
214 gate.corpora.TestDocument.verifyNodeIdConsistency(currentDoc);
215 }
216 } catch(ResourceInstantiationException re) {
217 Err.println("Cannot read the gazeteer lists!" +
218 "\nAre the GATE resources in place?\n" + re);
219 }
220
221 startJapeFileOpen = System.currentTimeMillis();
222 Out.print(": " + (startJapeFileOpen - startLookup) +
223 "ms\n");
224
225
226 //do the jape stuff
227 Gate.init();
228
229
230 try {
231 Out.print("Opening Jape grammar... ");
232 Batch batch = new Batch(TestJape.class.getResource(
233 Files.getResourcePath() + "/jape/combined/main.jape"), "UTF-8");
234 /*
235 Batch batch = new Batch("jape/combined/", "brian-soc-loc1.jape");
236 Batch batch =
237 new Batch("z:/gate/src/gate/resources/jape/combined/main.jape");
238 Batch batch = new Batch("jape/", "Country.jape");
239 */
240 startCorpusTransduce = (new Date()).getTime();
241 Out.print(": " + (startCorpusTransduce - startJapeFileOpen) +
242 "ms\n");
243 Out.print("Transducing the corpus... ");
244 batch.transduce(corpus);
245 endProcess = System.currentTimeMillis();
246 Out.print(": " + (endProcess - startCorpusTransduce) + "ms\n");
247 } catch(JapeException je) {
248 je.printStackTrace(Err.getPrintWriter());
249 }
250 } // DoBugTestGrammar
251
252 /**
253 * This test sets up a JAPE transducer based on a grammar
254 * (RhsError.jape) that will throw a null pointer exception.
255 * The test succeeds so long as we get that exception.
256 */
257 public void testRhsErrorMessages() {
258 boolean gotException = false;
259
260 try {
261 if(DEBUG) {
262 Out.print(
263 "Opening Jape grammar... " + Gate.getUrl("tests/RhsError.jape")
264 );
265 }
266 // a JAPE batcher
267 Batch batch = new Batch(Gate.getUrl("tests/RhsError.jape"), "UTF-8");
268
269 // a document with an annotation
270 Document doc = Factory.newDocument("This is a Small Document.");
271 FeatureMap features = Factory.newFeatureMap();
272 features.put("orth", "upperInitial");
273 doc.getAnnotations().add(new Long(0), new Long(8), "Token", features);
274
275 // run jape on the document
276 batch.transduce(doc);
277 } catch(Exception e) {
278 if(DEBUG) Out.prln(e);
279 gotException = true;
280 }
281
282 assertTrue("Bad JAPE grammar didn't throw an exception", gotException);
283
284 } // testRhsErrorMessages
285
286 // /**
287 // * This test sets up a JAPE transducer based on a grammar
288 // * (RhsError2.jape) that will throw a compiler error.
289 // * The test succeeds so long as we get that exception.
290 // */
291 // public void testRhsErrorMessages2() {
292 // boolean gotException = false;
293 //
294 // // disable System.out so that the compiler can't splash its error on screen
295 // if(DEBUG) System.out.println("hello 1");
296 // PrintStream sysout = System.out;
297 // System.setOut(new PrintStream(new ByteArrayOutputStream()));
298 // if(DEBUG) System.out.println("hello 2");
299 //
300 // // run a JAPE batch on the faulty grammar
301 // try {
302 // if(DEBUG) {
303 // Out.print(
304 // "Opening Jape grammar... " + Gate.getUrl("tests/RhsError2.jape")
305 // );
306 // }
307 // // a JAPE batcher
308 // Batch batch = new Batch(Gate.getUrl("tests/RhsError2.jape"), "UTF-8");
309 // } catch(Exception e) {
310 // if(DEBUG) Out.prln(e);
311 // gotException = true;
312 // } finally {
313 //
314 // // re-enable System.out
315 // System.setOut(sysout);
316 // if(DEBUG) System.out.println("hello 3");
317 // }
318 //
319 // assertTrue("Bad JAPE grammar (2) didn't throw an exception", gotException);
320 //
321 // } // testRhsErrorMessages2
322 //
323
324 /** Test suite routine for the test runner */
325 public static Test suite() {
326 return new TestSuite(TestJape.class);
327 } // suite
328
329 //main method for running this test as a standalone test
330 public static void main(String[] args) {
331 for(int i = 0; i < 6; i++){
332 System.gc();
333 Out.println("Run " + i + " ==============");
334 try{
335 TestJape testJape = new TestJape("Test Jape");
336 testJape.setUp();
337 if(args.length < 1) testJape.DoTestBigGrammar("AveShort");
338 else testJape.DoTestBigGrammar(args[0]);
339 } catch(Exception e) {
340 e.printStackTrace(Err.getPrintWriter());
341 }
342 }
343 }
344 } // class TestJape
345