| ParsingFunctions.java |
1 package gate.creole.morph;
2
3 import java.util.Stack;
4 /**
5 * <p>Title: ParsingFunctions.java </p>
6 * <p>Description: This class implements all static methods, which can be used
7 * for various purposes, like converting rules defined by users into the regular
8 * expressions, finding varilable type from its value type etc. </p>
9 */
10 public class ParsingFunctions {
11
12 /**
13 * Default Constructor
14 */
15 public ParsingFunctions() {
16
17 }
18
19 /**
20 * This method takes the value of the variable and tells the user
21 * what type of value is from CharacterRange, CharacterSet, StringSet
22 * @param varValue value for which to find the variable type
23 * @return ERROR_CODE = -4, STRING_SET_CODE = 0, CHARACTER_RANGE_CODE = 1,
24 * CHARACTER_SET_CODE = 2;
25 */
26 public static int findVariableType(String varValue) {
27 // if the value starts with " it is string set
28 // if the value starts with "[-" it is a character range
29 // if the value starts with "[" it is a character set
30 // otherwise error
31 if(varValue==null) {
32 return Codes.ERROR_CODE;
33 }
34
35 if(varValue.length()>=3 && varValue.charAt(0)=='\"'
36 && (varValue.lastIndexOf('\"')==(varValue.length()-1))) {
37 // for string set it should be greater than 3 because
38 // it requires at least one character to make the string
39 // first and the last character should be "
40 return Codes.STRING_SET_CODE;
41
42 } else if(varValue.length()>=6 && (((varValue.length()-3)%3)==0)
43 && varValue.substring(0,2).equals("[-")
44 && varValue.charAt(varValue.length()-1)==']') {
45 // for the character range it should be greater than 6 because
46 // three characters as "[-" and "]"
47 // and finally to define the range character-character
48 return Codes.CHARACTER_RANGE_CODE;
49
50 } else if(varValue.length()>=3 && varValue.charAt(0)=='['
51 && varValue.charAt(varValue.length()-1)==']') {
52 // for the character set it should be greater than 3 characters because
53 // it requires at least one character
54 // first and the last character should be [ and ] respectively
55 if(varValue.charAt(1)=='-') {
56 return Codes.ERROR_CODE;
57 } else {
58 return Codes.CHARACTER_SET_CODE;
59 }
60
61 } else {
62 // there are some errors
63 return Codes.ERROR_CODE;
64 }
65
66 }
67
68 /**
69 * This method checks for the string if it is a valid integer value
70 * @param value value to be checked for its type to be integer
71 * @return if value is an integer returns true, false otherwise
72 */
73 public static boolean isInteger(String value) {
74 try {
75 int no = Integer.parseInt(value);
76 } catch(NumberFormatException nfe) {
77 return false;
78 }
79 return true;
80 }
81
82 /**
83 * This method checks for the string if it is a valid integer value
84 * @param value value to be checked for its type to be integer
85 * @return if value is an integer returns true, false otherwise
86 */
87 public static boolean isBoolean(String value) {
88 if(value.equals("false") || value.equals("true")) {
89 return true;
90 } else {
91 return false;
92 }
93 }
94
95 /**
96 * This method convert the expression which has been entered by the user
97 * in the .rul file (i.e. rules defined by the user), into the expression
98 * which are recognized by the regular expression Patterns
99 * @param line rule defined by the user
100 * @param storage this method internally requires values of the used
101 * variables to replace the them with their values in the expression
102 * @return newly generated regular expression
103 */
104 public static String convertToRegExp(String line,Storage storage) {
105 // replace all OR with |
106 line = line.replaceAll("( OR )", "|");
107 line = line.replaceAll("(\\[\\-)","[");
108
109 // we will use the stack concept here
110 // for every occurence of '{', or '(' we will add that into the stack
111 // and for every occurence of '}' or ')' we will remove that element from
112 // the stack
113 // if the value found between the bracket is an integer value
114 // we won't replace those brackets
115 StringBuffer newExpr = new StringBuffer(line);
116 Stack stack = new Stack();
117 Stack bracketIndexes = new Stack();
118
119 for (int i = 0; i < newExpr.length(); i++) {
120 if (newExpr.charAt(i) == '{') {
121 // add it to the stack
122 stack.add("{");
123 bracketIndexes.add(new Integer(i));
124
125 }
126 else if (newExpr.charAt(i) == '(') {
127 // add it to the stack
128 stack.add("(");
129 bracketIndexes.add(new Integer(i));
130
131 }
132 else if (newExpr.charAt(i) == '[') {
133 // add it to the stack
134 stack.add("[");
135 bracketIndexes.add(new Integer(i));
136
137 }
138 else if (newExpr.charAt(i) == '\"') {
139 // before adding it to the stack, check if this is the closing one
140 if (stack.isEmpty() || !(((String)(stack.get(stack.size() - 1))).equals("\""))) {
141 // yes this is the opening one
142 // add it to the stack
143 stack.add("\"");
144 bracketIndexes.add(new Integer(i));
145 } else {
146 // this is the closing one
147 String bracket = (String)(stack.pop());
148 int index = ((Integer)(bracketIndexes.pop())).intValue();
149 newExpr.setCharAt(index, '(');
150 newExpr.setCharAt(i, ')');
151 }
152 }
153 else if (newExpr.charAt(i) == '}') {
154 // remove the element from the stack
155 // it must be '{', otherwise generate the error
156 String bracket = (String) (stack.pop());
157 int index = ((Integer)(bracketIndexes.pop())).intValue();
158 if (!bracket.equals("{")) {
159 return null;
160 }
161
162 // now check if the value between these brackets is integer, that means
163 // we don't need to change the brackets, otherwise change them to
164 // '(' and ')'
165 if (isInteger(newExpr.substring(index + 1, i))) {
166 // yes it is an integer
167 // continue
168 continue;
169 }
170 else {
171 // no it is string
172 newExpr.setCharAt(index, '(');
173 newExpr.setCharAt(i, ')');
174 }
175
176 }
177 else if (newExpr.charAt(i) == ')') {
178 // remove the element from the stack
179 // it must be ')', otherwise generate the error
180 String bracket = (String) (stack.pop());
181 int index = ( (Integer) (bracketIndexes.pop())).intValue();
182 if (!bracket.equals("(")) {
183 return null;
184 }
185 continue;
186 }
187 else if (newExpr.charAt(i) == ']') {
188 // remove the element from the stack
189 // it must be '[', otherwise generate the error
190 String bracket = (String) (stack.pop());
191 int index = ( (Integer) (bracketIndexes.pop())).intValue();
192 if (!bracket.equals("[")) {
193 return null;
194 }
195 }
196 }
197 // check if all the stacks are empty then and only then the written
198 // expression is true, otherwise it is incorrect
199 if(!stack.empty() || !bracketIndexes.empty()) {
200 return null;
201 }
202 //System.out.println(line+" "+newExpr);
203 // now we need to replace the variables with their values
204 // but how would we know which is the variable
205 // so get the variable list and check if it is available in the expression
206 String [] varNames = storage.getVarNames();
207 for(int i=0;i<varNames.length;i++) {
208 // check for the occurance of each varName in the expression
209 int index = -1;
210 String myString = "{[()]} ";
211 while((index=newExpr.indexOf(varNames[i],index+1))!=-1) {
212 //System.out.println(index + " "+newExpr.length());
213 // now check for the left and right characters
214 if(index>0) {
215 if(myString.indexOf(newExpr.charAt(index-1))==-1) {
216 index = index +varNames[i].length()-1;
217 // this is not the varilable
218 continue;
219 }
220 }
221 if((varNames[i].length()+index)<newExpr.length()) {
222 if(myString.indexOf(newExpr.charAt(varNames[i].length()+index))==-1) {
223 index = index +varNames[i].length()-1;
224 // this is not the variable
225 continue;
226 }
227 }
228
229 // yes it is a variable
230 String replaceWith = "("+(String)(storage.get(varNames[i]))+")";
231 newExpr.replace(index,(varNames[i].length()+index),replaceWith);
232 index = index + replaceWith.length();
233 }
234 }
235 return new String(newExpr);
236 }
237 }