| Document.java |
1 /*
2 * Document.java
3 *
4 * Copyright (c) 1998-2004, The University of Sheffield.
5 *
6 * This file is part of GATE (see http://gate.ac.uk/), and is free
7 * software, licenced under the GNU Library General Public License,
8 * Version 2, June 1991 (in the distribution as file licence.html,
9 * and also available at http://gate.ac.uk/gate/licence.html).
10 *
11 * Hamish Cunningham, 19/Jan/2000
12 *
13 * $Id: Document.java,v 1.40 2004/07/23 11:33:20 kalina Exp $
14 */
15
16 package gate;
17
18 import java.net.URL;
19 import java.util.Map;
20 import java.util.Set;
21
22 import gate.event.DocumentListener;
23 import gate.util.InvalidOffsetException;
24
25
26 /** Represents the commonalities between all sorts of documents.
27 */
28 public interface Document extends SimpleDocument {
29
30 /**
31 * The parameter name that determines whether or not a document is markup aware
32 */
33 public static final String
34 DOCUMENT_MARKUP_AWARE_PARAMETER_NAME = "markupAware";
35
36 public static final String
37 DOCUMENT_ENCODING_PARAMETER_NAME = "encoding";
38
39 public static final String
40 DOCUMENT_PRESERVE_CONTENT_PARAMETER_NAME = "preserveOriginalContent";
41
42 public static final String
43 DOCUMENT_STRING_CONTENT_PARAMETER_NAME = "stringContent";
44
45 public static final String
46 DOCUMENT_REPOSITIONING_PARAMETER_NAME = "collectRepositioningInfo";
47
48 public static final String
49 DOCUMENT_START_OFFSET_PARAMETER_NAME = "sourceUrlStartOffset";
50
51 public static final String
52 DOCUMENT_END_OFFSET_PARAMETER_NAME = "sourceUrlEndOffset";
53
54 /** Documents may be packed within files; in this case an optional pair of
55 * offsets refer to the location of the document.
56 */
57 public Long[] getSourceUrlOffsets();
58
59 /** Documents may be packed within files; in this case an optional pair of
60 * offsets refer to the location of the document. This method gets the
61 * start offset.
62 */
63 public Long getSourceUrlStartOffset();
64
65 /** Documents may be packed within files; in this case an optional pair of
66 * offsets refer to the location of the document. This method gets the
67 * end offset.
68 */
69 public Long getSourceUrlEndOffset();
70
71 /** Returns a map with the named annotation sets
72 */
73 public Map getNamedAnnotationSets();
74
75 /** Make the document markup-aware. This will trigger the creation
76 * of a DocumentFormat object at Document initialisation time; the
77 * DocumentFormat object will unpack the markup in the Document and
78 * add it as annotations. Documents are <B>not</B> markup-aware by default.
79 *
80 * @param b markup awareness status.
81 */
82 public void setMarkupAware(Boolean b);
83
84 /** Get the markup awareness status of the Document.
85 *
86 * @return whether the Document is markup aware.
87 */
88 public Boolean getMarkupAware();
89
90 /**
91 * Allow/disallow preserving of the original document content.
92 * If is <B>true</B> the original content will be retrieved from
93 * the DocumentContent object and preserved as document feature.
94 */
95 public void setPreserveOriginalContent(Boolean b);
96
97 /** Get the preserving of content status of the Document.
98 *
99 * @return whether the Document should preserve it's original content.
100 */
101 public Boolean getPreserveOriginalContent();
102
103 /**
104 * Allow/disallow collecting of repositioning information.
105 * If is <B>true</B> information will be retrieved and preserved
106 * as document feature.<BR>
107 * Preserving of repositioning information give the possibilities
108 * for converting of coordinates between the original document content and
109 * extracted from the document text.
110 */
111 public void setCollectRepositioningInfo(Boolean b);
112
113 /** Get the collectiong and preserving of repositioning information
114 * for the Document. <BR>
115 * Preserving of repositioning information give the possibilities
116 * for converting of coordinates between the original document content and
117 * extracted from the document text.
118 *
119 * @return whether the Document should collect and preserve information.
120 */
121 public Boolean getCollectRepositioningInfo();
122
123 /** Returns a GateXml document. This document is actually a serialization of
124 * a Gate Document in XML.
125 * @return a string representing a Gate Xml document
126 */
127 public String toXml();
128
129 /** Returns an XML document aming to preserve the original markups(
130 * the original markup will be in the same place and format as it was
131 * before processing the document) and include (if possible)
132 * the annotations specified in the aSourceAnnotationSet.
133 * <b>Warning:</b> Annotations from the aSourceAnnotationSet will be lost
134 * if they will cause a crosed over situation.
135 * @param aSourceAnnotationSet is an annotation set containing all the
136 * annotations that will be combined with the original marup set.
137 * @param includeFeatures determines whether or not features and gate IDs
138 * of the annotations should be included as attributes on the tags or not.
139 * If false, then only the annotation types are exported as tags, with no
140 * attributes.
141 * @return a string representing an XML document containing the original
142 * markup + dumped annotations form the aSourceAnnotationSet
143 */
144 public String toXml(Set aSourceAnnotationSet, boolean includeFeatures);
145
146 /**
147 * Equivalent to toXml(aSourceAnnotationSet, true).
148 */
149 public String toXml(Set aSourceAnnotationSet);
150
151 /** Make changes to the content.
152 */
153 public void edit(Long start, Long end, DocumentContent replacement)
154 throws InvalidOffsetException;
155
156 /**
157 * Adds a {@link gate.event.DocumentListener} to this document.
158 * All the registered listeners will be notified of changes occured to the
159 * document.
160 */
161 public void addDocumentListener(DocumentListener l);
162
163 /**
164 * Removes one of the previously registered document listeners.
165 */
166 public void removeDocumentListener(DocumentListener l);
167
168
169 /** Documents may be packed within files; in this case an optional pair of
170 * offsets refer to the location of the document. This method sets the
171 * end offset.
172 */
173 public void setSourceUrlEndOffset(Long sourceUrlEndOffset);
174
175
176 /** Documents may be packed within files; in this case an optional pair of
177 * offsets refer to the location of the document. This method sets the
178 * start offset.
179 */
180 public void setSourceUrlStartOffset(Long sourceUrlStartOffset);
181
182 } // interface Document
183
184