| GateEventStream.java |
1 /*
2 * Copyright (c) 2004, The University of Sheffield.
3 *
4 * This file is part of GATE (see http://gate.ac.uk/), and is free
5 * software, licenced under the GNU Library General Public License,
6 * Version 2, June 1991 (in the distribution as file licence.html,
7 * and also available at http://gate.ac.uk/gate/licence.html).
8 *
9 * Mike Dowman 30-03-2004
10 *
11 * $Id: GateEventStream.java,v 1.2 2004/04/28 11:25:29 valyt Exp $
12 *
13 */
14
15 package gate.creole.ml.maxent;
16
17 /**
18 * This class is used by MaxentWrapper. When created, it is passed a data
19 * structure containg all the training data for the classifier. It can then
20 * provide this data to the maxent model itself, as needed.
21 */
22 public class GateEventStream implements opennlp.maxent.EventStream {
23
24 boolean DEBUG=false;
25
26 final java.util.List trainingData;
27 final int indexOfOutcome;
28
29 int index=0;
30
31 /**
32 * This constructor stores all the training data in the object when the object
33 * is created.
34 *
35 * @param newTrainingData A List of Lists of String objects. Each String is
36 * a maxent feature or outcome.
37 * @param newIndexOfOutcome This is the index of the String objects that are
38 * the outcomes.
39 */
40 GateEventStream(java.util.List newTrainingData, int newIndexOfOutcome) {
41 trainingData=newTrainingData;
42 indexOfOutcome=newIndexOfOutcome;
43 }
44
45 /**
46 * Extract the next instance from those stored in this object, and advance
47 * the objects internal index to point at the next instance.
48 *
49 * An exception will be thrown if this method is called when there are no
50 * more instances to extract.
51 *
52 * @return The next instance.
53 */
54 public opennlp.maxent.Event nextEvent() {
55 ++index;
56 return instance2Event((java.util.List)trainingData.get(index-1));
57 }
58
59 /**
60 * See whether there are any more instances to be extracted from this object.
61 *
62 * @return true if there are more instances, false otherwise.
63 */
64 public boolean hasNext() {
65 return index<trainingData.size();
66 }
67
68 /**
69 * Convert an instance into an Event object, taking note of the position of
70 * the outcome (class attribute) stored in this object.
71 *
72 * @param instance The instance in the form of a list of String objects.
73 * @return A maxent Event object containing the outcome (class attribute) and
74 * the features (other attributes).
75 */
76 private opennlp.maxent.Event instance2Event(java.util.List instance) {
77 // Store the outcome separately - and make sure that if it's null then
78 // it gets converted to the String "null".
79 java.lang.String outcome=""+(java.lang.String)instance.get(indexOfOutcome);
80
81 // Then make a new list which doesn't contain the outcome.
82 java.util.List features=
83 new java.util.ArrayList(instance.subList(0, indexOfOutcome));
84 features.addAll(instance.subList(indexOfOutcome+1, instance.size()));
85
86 if (DEBUG) {
87 System.out.println("New event: outcome="+outcome);
88 System.out.println("features="+instance);
89 }
90
91 // Now make the Event and return it.
92 return new opennlp.maxent.Event(outcome,
93 (String[])features.toArray(new String[0]));
94 }
95 }
96
97