001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *     http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.geronimo.javamail.store.imap.connection;
019    
020    import java.util.List;
021    import java.util.ArrayList;
022    import java.util.StringTokenizer;
023    
024    import javax.mail.MessagingException;
025    
026    import org.apache.geronimo.javamail.util.ResponseFormatException; 
027    import org.apache.geronimo.javamail.store.imap.connection.IMAPResponseTokenizer.Token; 
028    
029    /**
030     * Class to represent a FETCH response BODY segment qualifier.  The qualifier is 
031     * of the form "BODY[<section>]<<partial>>".  The optional section qualifier is 
032     * a "." separated part specifiers.  A part specifier is either a number, or 
033     * one of the tokens HEADER, HEADER.FIELD, HEADER.FIELD.NOT, MIME, and TEXT.  
034     * The partial specification is in the form "<start.length>". 
035     *
036     * @version $Rev: 594520 $ $Date: 2007-11-13 07:57:39 -0500 (Tue, 13 Nov 2007) $
037     */
038    public class IMAPBodySection {
039        // the section type qualifiers 
040        static public final int BODY = 0; 
041        static public final int HEADERS = 1; 
042        static public final int HEADERSUBSET = 2; 
043        static public final int MIME = 3;
044        static public final int TEXT = 4; 
045        
046        // the optional part number 
047        public String partNumber = "1"; 
048        // the string name of the section 
049        public String sectionName = ""; 
050        // the section qualifier 
051        public int section; 
052        // the starting substring position 
053        public int start = -1; 
054        // the substring length (requested)
055        public int length = -1; 
056        // the list of any explicit header names 
057        public List headers = null; 
058        
059        /**
060         * Construct a simple-toplevel BodySection tag.
061         * 
062         * @param section The section identifier.
063         */
064        public IMAPBodySection(int section) {
065            this.section = section; 
066            partNumber = "1"; 
067            start = -1; 
068            length = -1; 
069        }
070        
071        /**
072         * construct a BodySegment descriptor from the FETCH returned name. 
073         * 
074         * @param name   The name code, which may be encoded with a section identifier and
075         *               substring qualifiers.
076         * 
077         * @exception MessagingException
078         */
079        public IMAPBodySection(IMAPResponseTokenizer source) throws MessagingException {
080            
081            // this could be just "BODY" alone.  
082            if (!source.peek(false, true).isType('[')) {
083                // complete body, all other fields take default  
084                section = BODY;             
085                return; 
086            }
087            
088            // now we need to scan along this, building up the pieces as we go. 
089            // NOTE:  The section identifiers use "[", "]", "." as delimiters, which 
090            // are normally acceptable in ATOM names.  We need to use the expanded 
091            // delimiter set to parse these tokens off. 
092            Token token = source.next(false, true); 
093            // the first token was the "[", now step to the next token in line. 
094            token = source.next(false, true); 
095            
096            if (token.isType(Token.NUMERIC)) {
097                token = parsePartNumber(token, source); 
098            }
099            
100            // have a potential name here?
101            if (token.isType(Token.ATOM)) {
102                token = parseSectionName(token, source); 
103            }
104            
105            // the HEADER.FIELD and HEADER.FIELD.NOT section types 
106            // are followed by a list of header names. 
107            if (token.isType('(')) {
108                token = parseHeaderList(source); 
109            }
110            
111            // ok, in theory, our current token should be a ']'
112            if (!token.isType(']')) {
113                throw new ResponseFormatException("Invalid section identifier on FETCH response"); 
114            }
115            
116            // do we have a substring qualifier?
117            // that needs to be stripped off too 
118            parseSubstringValues(source); 
119            
120            // now fill in the type information 
121            if (sectionName.equals("")) {
122                section = BODY; 
123            }
124            else if (sectionName.equals("HEADER")) {
125                section = HEADERS; 
126            }
127            else if (sectionName.equals("HEADER.FIELDS")) {
128                section = HEADERSUBSET; 
129            }
130            else if (sectionName.equals("HEADER.FIELDS.NOT")) {
131                section = HEADERSUBSET; 
132            }
133            else if (sectionName.equals("TEXT")) {
134                section = TEXT; 
135            }
136            else if (sectionName.equals("MIME")) {
137                section = MIME; 
138            }
139        }
140        
141        
142        /**
143         * Strip the part number off of a BODY section identifier.  The part number 
144         * is a series of "." separated tokens.  So "BODY[3.2.1]" would be the BODY for 
145         * section 3.2.1 of a multipart message.  The section may also have a qualifier
146         * name on the end.  "BODY[3.2.1.HEADER}" would be the HEADERS for that 
147         * body section.  The return value is the name of the section, which can 
148         * be a "" or the the section qualifier (e.g., "HEADER"). 
149         * 
150         * @param name   The section name.
151         * 
152         * @return The remainder of the section name after the numeric part number has 
153         *         been removed.
154         */
155        private Token parsePartNumber(Token token, IMAPResponseTokenizer source) throws MessagingException {
156            StringBuffer part = new StringBuffer(token.getValue()); 
157            // NB:  We're still parsing with the expanded delimiter set 
158            token = source.next(false, true); 
159            
160            while (true) {
161                // Not a period?  We've reached the end of the section number, 
162                // finalize the part number and let the caller figure out what 
163                // to do from here.  
164                if (!token.isType('.')) {
165                    partNumber = part.toString(); 
166                    return token; 
167                }
168                // might have another number section 
169                else {
170                    // step to the next token 
171                    token = source.next(false, true); 
172                    // another section number piece?
173                    if (token.isType(Token.NUMERIC)) {
174                        // add this to the collection, and continue 
175                        part.append('.'); 
176                        part.append(token.getValue()); 
177                        token = source.next(false, true); 
178                    }
179                    else  {
180                        partNumber = part.toString(); 
181                        // this is likely the start of the section name 
182                        return token; 
183                    }
184                }
185            }
186        }
187        
188        
189        /**
190         * Parse the section name, if any, in a BODY section qualifier.  The 
191         * section name may stand alone within the body section (e.g., 
192         * "BODY[HEADERS]" or follow the section number (e.g., 
193         * "BODY[1.2.3.HEADERS.FIELDS.NOT]".  
194         * 
195         * @param token  The first token of the name sequence.
196         * @param source The source tokenizer.
197         * 
198         * @return The first non-name token in the response. 
199         */
200        private Token parseSectionName(Token token, IMAPResponseTokenizer source) throws MessagingException {
201            StringBuffer part = new StringBuffer(token.getValue()); 
202            // NB:  We're still parsing with the expanded delimiter set 
203            token = source.next(false, true); 
204            
205            while (true) {
206                // Not a period?  We've reached the end of the section number, 
207                // finalize the part number and let the caller figure out what 
208                // to do from here.  
209                if (!token.isType('.')) {
210                    sectionName = part.toString(); 
211                    return token; 
212                }
213                // might have another number section 
214                else {
215                    // add this to the collection, and continue 
216                    part.append('.'); 
217                    part.append(source.readString()); 
218                    token = source.next(false, true); 
219                }
220            }
221        }
222        
223        
224        /**
225         * Parse a header list that may follow the HEADER.FIELD or HEADER.FIELD.NOT
226         * name qualifier.  This is a list of string values enclosed in parens.
227         * 
228         * @param source The source tokenizer.
229         * 
230         * @return The next token in the response (which should be the section terminator, ']')
231         * @exception MessagingException
232         */
233        private Token parseHeaderList(IMAPResponseTokenizer source) throws MessagingException {
234            headers = new ArrayList();
235            
236            // normal parsing rules going on here 
237            while (source.notListEnd()) {
238                String value = source.readString();
239                headers.add(value);
240            }
241            // step over the closing paren 
242            source.next(); 
243            // NB, back to the expanded token rules again 
244            return source.next(false, true); 
245        }
246        
247        
248        /**
249         * Parse off the substring values following the section identifier, if 
250         * any.  If present, they will be in the format "<start.len>".  
251         * 
252         * @param source The source tokenizer.
253         * 
254         * @exception MessagingException
255         */
256        private void parseSubstringValues(IMAPResponseTokenizer source) throws MessagingException {
257            // We rarely have one of these, so it's a quick out 
258            if (!source.peek(false, true).isType('<')) {
259                return; 
260            }
261            // step over the angle bracket. 
262            source.next(false, true); 
263            // pull out the start information 
264            start = source.next(false, true).getInteger(); 
265            // step over the period 
266            source.next(false, true);         
267            // now the length bit                  
268            length = source.next(false, true).getInteger(); 
269            // and consume the closing angle bracket 
270            source.next(false, true); 
271        }
272    }
273