001/*
002 * Copyright 2007-2016 UnboundID Corp.
003 * All Rights Reserved.
004 */
005/*
006 * Copyright (C) 2008-2016 UnboundID Corp.
007 *
008 * This program is free software; you can redistribute it and/or modify
009 * it under the terms of the GNU General Public License (GPLv2 only)
010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only)
011 * as published by the Free Software Foundation.
012 *
013 * This program is distributed in the hope that it will be useful,
014 * but WITHOUT ANY WARRANTY; without even the implied warranty of
015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
016 * GNU General Public License for more details.
017 *
018 * You should have received a copy of the GNU General Public License
019 * along with this program; if not, see <http://www.gnu.org/licenses>.
020 */
021package com.unboundid.ldif;
022
023
024
025import java.io.BufferedReader;
026import java.io.BufferedWriter;
027import java.io.File;
028import java.io.FileInputStream;
029import java.io.FileWriter;
030import java.io.InputStream;
031import java.io.InputStreamReader;
032import java.io.IOException;
033import java.text.ParseException;
034import java.util.ArrayList;
035import java.util.Collection;
036import java.util.Iterator;
037import java.util.HashSet;
038import java.util.LinkedHashMap;
039import java.util.List;
040import java.util.Set;
041import java.util.concurrent.BlockingQueue;
042import java.util.concurrent.ArrayBlockingQueue;
043import java.util.concurrent.TimeUnit;
044import java.util.concurrent.atomic.AtomicBoolean;
045import java.nio.charset.Charset;
046
047import com.unboundid.asn1.ASN1OctetString;
048import com.unboundid.ldap.matchingrules.CaseIgnoreStringMatchingRule;
049import com.unboundid.ldap.matchingrules.MatchingRule;
050import com.unboundid.ldap.sdk.Attribute;
051import com.unboundid.ldap.sdk.Control;
052import com.unboundid.ldap.sdk.Entry;
053import com.unboundid.ldap.sdk.Modification;
054import com.unboundid.ldap.sdk.ModificationType;
055import com.unboundid.ldap.sdk.LDAPException;
056import com.unboundid.ldap.sdk.schema.AttributeTypeDefinition;
057import com.unboundid.ldap.sdk.schema.Schema;
058import com.unboundid.util.AggregateInputStream;
059import com.unboundid.util.Base64;
060import com.unboundid.util.LDAPSDKThreadFactory;
061import com.unboundid.util.ThreadSafety;
062import com.unboundid.util.ThreadSafetyLevel;
063import com.unboundid.util.parallel.AsynchronousParallelProcessor;
064import com.unboundid.util.parallel.Result;
065import com.unboundid.util.parallel.ParallelProcessor;
066import com.unboundid.util.parallel.Processor;
067
068import static com.unboundid.ldif.LDIFMessages.*;
069import static com.unboundid.util.Debug.*;
070import static com.unboundid.util.StaticUtils.*;
071import static com.unboundid.util.Validator.*;
072
073/**
074 * This class provides an LDIF reader, which can be used to read and decode
075 * entries and change records from a data source using the LDAP Data Interchange
076 * Format as per <A HREF="http://www.ietf.org/rfc/rfc2849.txt">RFC 2849</A>.
077 * <BR>
078 * This class is not synchronized.  If multiple threads read from the
079 * LDIFReader, they must be synchronized externally.
080 * <BR><BR>
081 * <H2>Example</H2>
082 * The following example iterates through all entries contained in an LDIF file
083 * and attempts to add them to a directory server:
084 * <PRE>
085 * LDIFReader ldifReader = new LDIFReader(pathToLDIFFile);
086 *
087 * int entriesRead = 0;
088 * int entriesAdded = 0;
089 * int errorsEncountered = 0;
090 * while (true)
091 * {
092 *   Entry entry;
093 *   try
094 *   {
095 *     entry = ldifReader.readEntry();
096 *     if (entry == null)
097 *     {
098 *       // All entries have been read.
099 *       break;
100 *     }
101 *
102 *     entriesRead++;
103 *   }
104 *   catch (LDIFException le)
105 *   {
106 *     errorsEncountered++;
107 *     if (le.mayContinueReading())
108 *     {
109 *       // A recoverable error occurred while attempting to read a change
110 *       // record, at or near line number le.getLineNumber()
111 *       // The entry will be skipped, but we'll try to keep reading from the
112 *       // LDIF file.
113 *       continue;
114 *     }
115 *     else
116 *     {
117 *       // An unrecoverable error occurred while attempting to read an entry
118 *       // at or near line number le.getLineNumber()
119 *       // No further LDIF processing will be performed.
120 *       break;
121 *     }
122 *   }
123 *   catch (IOException ioe)
124 *   {
125 *     // An I/O error occurred while attempting to read from the LDIF file.
126 *     // No further LDIF processing will be performed.
127 *     errorsEncountered++;
128 *     break;
129 *   }
130 *
131 *   LDAPResult addResult;
132 *   try
133 *   {
134 *     addResult = connection.add(entry);
135 *     // If we got here, then the change should have been processed
136 *     // successfully.
137 *     entriesAdded++;
138 *   }
139 *   catch (LDAPException le)
140 *   {
141 *     // If we got here, then the change attempt failed.
142 *     addResult = le.toLDAPResult();
143 *     errorsEncountered++;
144 *   }
145 * }
146 *
147 * ldifReader.close();
148 * </PRE>
149 */
150@ThreadSafety(level=ThreadSafetyLevel.NOT_THREADSAFE)
151public final class LDIFReader
152{
153  /**
154   * The default buffer size (128KB) that will be used when reading from the
155   * data source.
156   */
157  public static final int DEFAULT_BUFFER_SIZE = 128 * 1024;
158
159
160
161  /*
162   * When processing asynchronously, this determines how many of the allocated
163   * worker threads are used to parse each batch of read entries.
164   */
165  private static final int ASYNC_MIN_PER_PARSING_THREAD = 3;
166
167
168
169  /**
170   * When processing asynchronously, this specifies the size of the pending and
171   * completed queues.
172   */
173  private static final int ASYNC_QUEUE_SIZE = 500;
174
175
176
177  /**
178   * Special entry used internally to signal that the LDIFReaderEntryTranslator
179   * has signalled that a read Entry should be skipped by returning null,
180   * which normally implies EOF.
181   */
182  private static final Entry SKIP_ENTRY = new Entry("cn=skipped");
183
184
185
186  /**
187   * The default base path that will be prepended to relative paths.  It will
188   * end with a trailing slash.
189   */
190  private static final String DEFAULT_RELATIVE_BASE_PATH;
191  static
192  {
193    final File currentDir;
194    String currentDirString = System.getProperty("user.dir");
195    if (currentDirString == null)
196    {
197      currentDir = new File(".");
198    }
199    else
200    {
201      currentDir = new File(currentDirString);
202    }
203
204    final String currentDirAbsolutePath = currentDir.getAbsolutePath();
205    if (currentDirAbsolutePath.endsWith(File.separator))
206    {
207      DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath;
208    }
209    else
210    {
211      DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath + File.separator;
212    }
213  }
214
215
216
217  // The buffered reader that will be used to read LDIF data.
218  private final BufferedReader reader;
219
220  // The behavior that should be exhibited when encountering duplicate attribute
221  // values.
222  private volatile DuplicateValueBehavior duplicateValueBehavior;
223
224  // A line number counter.
225  private long lineNumberCounter = 0;
226
227  // The change record translator to use, if any.
228  private final LDIFReaderChangeRecordTranslator changeRecordTranslator;
229
230  // The entry translator to use, if any.
231  private final LDIFReaderEntryTranslator entryTranslator;
232
233  // The schema that will be used when processing, if applicable.
234  private Schema schema;
235
236  // Specifies the base path that will be prepended to relative paths for file
237  // URLs.
238  private volatile String relativeBasePath;
239
240  // The behavior that should be exhibited with regard to illegal trailing
241  // spaces in attribute values.
242  private volatile TrailingSpaceBehavior trailingSpaceBehavior;
243
244  // True iff we are processing asynchronously.
245  private final boolean isAsync;
246
247  //
248  // The following only apply to asynchronous processing.
249  //
250
251  // Parses entries asynchronously.
252  private final AsynchronousParallelProcessor<UnparsedLDIFRecord, LDIFRecord>
253       asyncParser;
254
255  // Set to true when the end of the input is reached.
256  private final AtomicBoolean asyncParsingComplete;
257
258  // The records that have been read and parsed.
259  private final BlockingQueue<Result<UnparsedLDIFRecord, LDIFRecord>>
260       asyncParsedRecords;
261
262
263
264  /**
265   * Creates a new LDIF reader that will read data from the specified file.
266   *
267   * @param  path  The path to the file from which the data is to be read.  It
268   *               must not be {@code null}.
269   *
270   * @throws  IOException  If a problem occurs while opening the file for
271   *                       reading.
272   */
273  public LDIFReader(final String path)
274         throws IOException
275  {
276    this(new FileInputStream(path));
277  }
278
279
280
281  /**
282   * Creates a new LDIF reader that will read data from the specified file
283   * and parses the LDIF records asynchronously using the specified number of
284   * threads.
285   *
286   * @param  path  The path to the file from which the data is to be read.  It
287   *               must not be {@code null}.
288   * @param  numParseThreads  If this value is greater than zero, then the
289   *                          specified number of threads will be used to
290   *                          asynchronously read and parse the LDIF file.
291   *
292   * @throws  IOException  If a problem occurs while opening the file for
293   *                       reading.
294   *
295   * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
296   *      constructor for more details about asynchronous processing.
297   */
298  public LDIFReader(final String path, final int numParseThreads)
299         throws IOException
300  {
301    this(new FileInputStream(path), numParseThreads);
302  }
303
304
305
306  /**
307   * Creates a new LDIF reader that will read data from the specified file.
308   *
309   * @param  file  The file from which the data is to be read.  It must not be
310   *               {@code null}.
311   *
312   * @throws  IOException  If a problem occurs while opening the file for
313   *                       reading.
314   */
315  public LDIFReader(final File file)
316         throws IOException
317  {
318    this(new FileInputStream(file));
319  }
320
321
322
323  /**
324   * Creates a new LDIF reader that will read data from the specified file
325   * and optionally parses the LDIF records asynchronously using the specified
326   * number of threads.
327   *
328   * @param  file             The file from which the data is to be read.  It
329   *                          must not be {@code null}.
330   * @param  numParseThreads  If this value is greater than zero, then the
331   *                          specified number of threads will be used to
332   *                          asynchronously read and parse the LDIF file.
333   *
334   * @throws  IOException  If a problem occurs while opening the file for
335   *                       reading.
336   */
337  public LDIFReader(final File file, final int numParseThreads)
338         throws IOException
339  {
340    this(new FileInputStream(file), numParseThreads);
341  }
342
343
344
345  /**
346   * Creates a new LDIF reader that will read data from the specified files in
347   * the order in which they are provided and optionally parses the LDIF records
348   * asynchronously using the specified number of threads.
349   *
350   * @param  files            The files from which the data is to be read.  It
351   *                          must not be {@code null} or empty.
352   * @param  numParseThreads  If this value is greater than zero, then the
353   *                          specified number of threads will be used to
354   *                          asynchronously read and parse the LDIF file.
355   * @param entryTranslator   The LDIFReaderEntryTranslator to apply to entries
356   *                          before they are returned.  This is normally
357   *                          {@code null}, which causes entries to be returned
358   *                          unaltered. This is particularly useful when
359   *                          parsing the input file in parallel because the
360   *                          entry translation is also done in parallel.
361   *
362   * @throws  IOException  If a problem occurs while opening the file for
363   *                       reading.
364   */
365  public LDIFReader(final File[] files, final int numParseThreads,
366                    final LDIFReaderEntryTranslator entryTranslator)
367         throws IOException
368  {
369    this(files, numParseThreads, entryTranslator, null);
370  }
371
372
373
374  /**
375   * Creates a new LDIF reader that will read data from the specified files in
376   * the order in which they are provided and optionally parses the LDIF records
377   * asynchronously using the specified number of threads.
378   *
379   * @param  files                   The files from which the data is to be
380   *                                 read.  It must not be {@code null} or
381   *                                 empty.
382   * @param  numParseThreads         If this value is greater than zero, then
383   *                                 the specified number of threads will be
384   *                                 used to asynchronously read and parse the
385   *                                 LDIF file.
386   * @param  entryTranslator         The LDIFReaderEntryTranslator to apply to
387   *                                 entries before they are returned.  This is
388   *                                 normally {@code null}, which causes entries
389   *                                 to be returned unaltered.  This is
390   *                                 particularly useful when parsing the input
391   *                                 file in parallel because the entry
392   *                                 translation is also done in parallel.
393   * @param  changeRecordTranslator  The LDIFReaderChangeRecordTranslator to
394   *                                 apply to change records before they are
395   *                                 returned.  This is normally {@code null},
396   *                                 which causes change records to be returned
397   *                                 unaltered.  This is particularly useful
398   *                                 when parsing the input file in parallel
399   *                                 because the change record translation is
400   *                                 also done in parallel.
401   *
402   * @throws  IOException  If a problem occurs while opening the file for
403   *                       reading.
404   */
405  public LDIFReader(final File[] files, final int numParseThreads,
406              final LDIFReaderEntryTranslator entryTranslator,
407              final LDIFReaderChangeRecordTranslator changeRecordTranslator)
408         throws IOException
409  {
410    this(createAggregateInputStream(files), numParseThreads, entryTranslator,
411         changeRecordTranslator);
412  }
413
414
415
416  /**
417   * Creates a new aggregate input stream that will read data from the specified
418   * files.  If there are multiple files, then a "padding" file will be inserted
419   * between them to ensure that there is at least one blank line between the
420   * end of one file and the beginning of another.
421   *
422   * @param  files  The files from which the data is to be read.  It must not be
423   *                {@code null} or empty.
424   *
425   * @return  The input stream to use to read data from the provided files.
426   *
427   * @throws  IOException  If a problem is encountered while attempting to
428   *                       create the input stream.
429   */
430  private static InputStream createAggregateInputStream(final File... files)
431          throws IOException
432  {
433    if (files.length == 0)
434    {
435      throw new IOException(ERR_READ_NO_LDIF_FILES.get());
436    }
437    else if (files.length == 1)
438    {
439      return new FileInputStream(files[0]);
440    }
441    else
442    {
443      final File spacerFile =
444           File.createTempFile("ldif-reader-spacer", ".ldif");
445      spacerFile.deleteOnExit();
446
447      final BufferedWriter spacerWriter =
448           new BufferedWriter(new FileWriter(spacerFile));
449      try
450      {
451        spacerWriter.newLine();
452        spacerWriter.newLine();
453      }
454      finally
455      {
456        spacerWriter.close();
457      }
458
459      final File[] returnArray = new File[(files.length * 2) - 1];
460      returnArray[0] = files[0];
461
462      int pos = 1;
463      for (int i=1; i < files.length; i++)
464      {
465        returnArray[pos++] = spacerFile;
466        returnArray[pos++] = files[i];
467      }
468
469      return new AggregateInputStream(returnArray);
470    }
471  }
472
473
474
475  /**
476   * Creates a new LDIF reader that will read data from the provided input
477   * stream.
478   *
479   * @param  inputStream  The input stream from which the data is to be read.
480   *                      It must not be {@code null}.
481   */
482  public LDIFReader(final InputStream inputStream)
483  {
484    this(inputStream, 0);
485  }
486
487
488
489  /**
490   * Creates a new LDIF reader that will read data from the specified stream
491   * and parses the LDIF records asynchronously using the specified number of
492   * threads.
493   *
494   * @param  inputStream  The input stream from which the data is to be read.
495   *                      It must not be {@code null}.
496   * @param  numParseThreads  If this value is greater than zero, then the
497   *                          specified number of threads will be used to
498   *                          asynchronously read and parse the LDIF file.
499   *
500   * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
501   *      constructor for more details about asynchronous processing.
502   */
503  public LDIFReader(final InputStream inputStream, final int numParseThreads)
504  {
505    // UTF-8 is required by RFC 2849.  Java guarantees it's always available.
506    this(new BufferedReader(new InputStreamReader(inputStream,
507                                                  Charset.forName("UTF-8")),
508                            DEFAULT_BUFFER_SIZE),
509         numParseThreads);
510  }
511
512
513
514  /**
515   * Creates a new LDIF reader that will read data from the specified stream
516   * and parses the LDIF records asynchronously using the specified number of
517   * threads.
518   *
519   * @param  inputStream  The input stream from which the data is to be read.
520   *                      It must not be {@code null}.
521   * @param  numParseThreads  If this value is greater than zero, then the
522   *                          specified number of threads will be used to
523   *                          asynchronously read and parse the LDIF file.
524   * @param entryTranslator  The LDIFReaderEntryTranslator to apply to read
525   *                         entries before they are returned.  This is normally
526   *                         {@code null}, which causes entries to be returned
527   *                         unaltered. This is particularly useful when parsing
528   *                         the input file in parallel because the entry
529   *                         translation is also done in parallel.
530   *
531   * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
532   *      constructor for more details about asynchronous processing.
533   */
534  public LDIFReader(final InputStream inputStream, final int numParseThreads,
535                    final LDIFReaderEntryTranslator entryTranslator)
536  {
537    this(inputStream, numParseThreads, entryTranslator, null);
538  }
539
540
541
542  /**
543   * Creates a new LDIF reader that will read data from the specified stream
544   * and parses the LDIF records asynchronously using the specified number of
545   * threads.
546   *
547   * @param  inputStream             The input stream from which the data is to
548   *                                 be read.  It must not be {@code null}.
549   * @param  numParseThreads         If this value is greater than zero, then
550   *                                 the specified number of threads will be
551   *                                 used to asynchronously read and parse the
552   *                                 LDIF file.
553   * @param  entryTranslator         The LDIFReaderEntryTranslator to apply to
554   *                                 entries before they are returned.  This is
555   *                                 normally {@code null}, which causes entries
556   *                                 to be returned unaltered.  This is
557   *                                 particularly useful when parsing the input
558   *                                 file in parallel because the entry
559   *                                 translation is also done in parallel.
560   * @param  changeRecordTranslator  The LDIFReaderChangeRecordTranslator to
561   *                                 apply to change records before they are
562   *                                 returned.  This is normally {@code null},
563   *                                 which causes change records to be returned
564   *                                 unaltered.  This is particularly useful
565   *                                 when parsing the input file in parallel
566   *                                 because the change record translation is
567   *                                 also done in parallel.
568   *
569   * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
570   *      constructor for more details about asynchronous processing.
571   */
572  public LDIFReader(final InputStream inputStream, final int numParseThreads,
573              final LDIFReaderEntryTranslator entryTranslator,
574              final LDIFReaderChangeRecordTranslator changeRecordTranslator)
575  {
576    // UTF-8 is required by RFC 2849.  Java guarantees it's always available.
577    this(new BufferedReader(
578              new InputStreamReader(inputStream, Charset.forName("UTF-8")),
579              DEFAULT_BUFFER_SIZE),
580         numParseThreads, entryTranslator, changeRecordTranslator);
581  }
582
583
584
585  /**
586   * Creates a new LDIF reader that will use the provided buffered reader to
587   * read the LDIF data.  The encoding of the underlying Reader must be set to
588   * "UTF-8" as required by RFC 2849.
589   *
590   * @param  reader  The buffered reader that will be used to read the LDIF
591   *                 data.  It must not be {@code null}.
592   */
593  public LDIFReader(final BufferedReader reader)
594  {
595    this(reader, 0);
596  }
597
598
599
600  /**
601   * Creates a new LDIF reader that will read data from the specified buffered
602   * reader and parses the LDIF records asynchronously using the specified
603   * number of threads.  The encoding of the underlying Reader must be set to
604   * "UTF-8" as required by RFC 2849.
605   *
606   * @param reader The buffered reader that will be used to read the LDIF data.
607   *               It must not be {@code null}.
608   * @param  numParseThreads  If this value is greater than zero, then the
609   *                          specified number of threads will be used to
610   *                          asynchronously read and parse the LDIF file.
611   *
612   * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator)
613   *      constructor for more details about asynchronous processing.
614   */
615  public LDIFReader(final BufferedReader reader, final int numParseThreads)
616  {
617    this(reader, numParseThreads, null);
618  }
619
620
621
622  /**
623   * Creates a new LDIF reader that will read data from the specified buffered
624   * reader and parses the LDIF records asynchronously using the specified
625   * number of threads.  The encoding of the underlying Reader must be set to
626   * "UTF-8" as required by RFC 2849.
627   *
628   * @param reader The buffered reader that will be used to read the LDIF data.
629   *               It must not be {@code null}.
630   * @param  numParseThreads  If this value is greater than zero, then the
631   *                          specified number of threads will be used to
632   *                          asynchronously read and parse the LDIF file.
633   *                          This should only be set to greater than zero when
634   *                          performance analysis has demonstrated that reading
635   *                          and parsing the LDIF is a bottleneck.  The default
636   *                          synchronous processing is normally fast enough.
637   *                          There is little benefit in passing in a value
638   *                          greater than four (unless there is an
639   *                          LDIFReaderEntryTranslator that does time-consuming
640   *                          processing).  A value of zero implies the
641   *                          default behavior of reading and parsing LDIF
642   *                          records synchronously when one of the read
643   *                          methods is called.
644   * @param entryTranslator  The LDIFReaderEntryTranslator to apply to read
645   *                         entries before they are returned.  This is normally
646   *                         {@code null}, which causes entries to be returned
647   *                         unaltered. This is particularly useful when parsing
648   *                         the input file in parallel because the entry
649   *                         translation is also done in parallel.
650   */
651  public LDIFReader(final BufferedReader reader,
652                    final int numParseThreads,
653                    final LDIFReaderEntryTranslator entryTranslator)
654  {
655    this(reader, numParseThreads, entryTranslator, null);
656  }
657
658
659
660  /**
661   * Creates a new LDIF reader that will read data from the specified buffered
662   * reader and parses the LDIF records asynchronously using the specified
663   * number of threads.  The encoding of the underlying Reader must be set to
664   * "UTF-8" as required by RFC 2849.
665   *
666   * @param reader                   The buffered reader that will be used to
667   *                                 read the LDIF data.  It must not be
668   *                                 {@code null}.
669   * @param  numParseThreads         If this value is greater than zero, then
670   *                                 the specified number of threads will be
671   *                                 used to asynchronously read and parse the
672   *                                 LDIF file.
673   * @param  entryTranslator         The LDIFReaderEntryTranslator to apply to
674   *                                 entries before they are returned.  This is
675   *                                 normally {@code null}, which causes entries
676   *                                 to be returned unaltered.  This is
677   *                                 particularly useful when parsing the input
678   *                                 file in parallel because the entry
679   *                                 translation is also done in parallel.
680   * @param  changeRecordTranslator  The LDIFReaderChangeRecordTranslator to
681   *                                 apply to change records before they are
682   *                                 returned.  This is normally {@code null},
683   *                                 which causes change records to be returned
684   *                                 unaltered.  This is particularly useful
685   *                                 when parsing the input file in parallel
686   *                                 because the change record translation is
687   *                                 also done in parallel.
688   */
689  public LDIFReader(final BufferedReader reader, final int numParseThreads,
690              final LDIFReaderEntryTranslator entryTranslator,
691              final LDIFReaderChangeRecordTranslator changeRecordTranslator)
692  {
693    ensureNotNull(reader);
694    ensureTrue(numParseThreads >= 0,
695               "LDIFReader.numParseThreads must not be negative.");
696
697    this.reader = reader;
698    this.entryTranslator = entryTranslator;
699    this.changeRecordTranslator = changeRecordTranslator;
700
701    duplicateValueBehavior = DuplicateValueBehavior.STRIP;
702    trailingSpaceBehavior  = TrailingSpaceBehavior.REJECT;
703
704    relativeBasePath = DEFAULT_RELATIVE_BASE_PATH;
705
706    if (numParseThreads == 0)
707    {
708      isAsync = false;
709      asyncParser = null;
710      asyncParsingComplete = null;
711      asyncParsedRecords = null;
712    }
713    else
714    {
715      isAsync = true;
716      asyncParsingComplete = new AtomicBoolean(false);
717
718      // Decodes entries in parallel.
719      final LDAPSDKThreadFactory threadFactory =
720           new LDAPSDKThreadFactory("LDIFReader Worker", true, null);
721      final ParallelProcessor<UnparsedLDIFRecord, LDIFRecord> parallelParser =
722           new ParallelProcessor<UnparsedLDIFRecord, LDIFRecord>(
723                new RecordParser(), threadFactory, numParseThreads,
724                ASYNC_MIN_PER_PARSING_THREAD);
725
726      final BlockingQueue<UnparsedLDIFRecord> pendingQueue = new
727           ArrayBlockingQueue<UnparsedLDIFRecord>(ASYNC_QUEUE_SIZE);
728
729      // The output queue must be a little more than twice as big as the input
730      // queue to more easily handle being shutdown in the middle of processing
731      // when the queues are full and threads are blocked.
732      asyncParsedRecords = new ArrayBlockingQueue
733           <Result<UnparsedLDIFRecord, LDIFRecord>>(2 * ASYNC_QUEUE_SIZE + 100);
734
735      asyncParser = new AsynchronousParallelProcessor
736           <UnparsedLDIFRecord, LDIFRecord>(pendingQueue, parallelParser,
737                                            asyncParsedRecords);
738
739      final LineReaderThread lineReaderThread = new LineReaderThread();
740      lineReaderThread.start();
741    }
742  }
743
744
745
746  /**
747   * Reads entries from the LDIF file with the specified path and returns them
748   * as a {@code List}.  This is a convenience method that should only be used
749   * for data sets that are small enough so that running out of memory isn't a
750   * concern.
751   *
752   * @param  path  The path to the LDIF file containing the entries to be read.
753   *
754   * @return  A list of the entries read from the given LDIF file.
755   *
756   * @throws  IOException  If a problem occurs while attempting to read data
757   *                       from the specified file.
758   *
759   * @throws  LDIFException  If a problem is encountered while attempting to
760   *                         decode data read as LDIF.
761   */
762  public static List<Entry> readEntries(final String path)
763         throws IOException, LDIFException
764  {
765    return readEntries(new LDIFReader(path));
766  }
767
768
769
770  /**
771   * Reads entries from the specified LDIF file and returns them as a
772   * {@code List}.  This is a convenience method that should only be used for
773   * data sets that are small enough so that running out of memory isn't a
774   * concern.
775   *
776   * @param  file  A reference to the LDIF file containing the entries to be
777   *               read.
778   *
779   * @return  A list of the entries read from the given LDIF file.
780   *
781   * @throws  IOException  If a problem occurs while attempting to read data
782   *                       from the specified file.
783   *
784   * @throws  LDIFException  If a problem is encountered while attempting to
785   *                         decode data read as LDIF.
786   */
787  public static List<Entry> readEntries(final File file)
788         throws IOException, LDIFException
789  {
790    return readEntries(new LDIFReader(file));
791  }
792
793
794
795  /**
796   * Reads and decodes LDIF entries from the provided input stream and
797   * returns them as a {@code List}.  This is a convenience method that should
798   * only be used for data sets that are small enough so that running out of
799   * memory isn't a concern.
800   *
801   * @param  inputStream  The input stream from which the entries should be
802   *                      read.  The input stream will be closed before
803   *                      returning.
804   *
805   * @return  A list of the entries read from the given input stream.
806   *
807   * @throws  IOException  If a problem occurs while attempting to read data
808   *                       from the input stream.
809   *
810   * @throws  LDIFException  If a problem is encountered while attempting to
811   *                         decode data read as LDIF.
812   */
813  public static List<Entry> readEntries(final InputStream inputStream)
814         throws IOException, LDIFException
815  {
816    return readEntries(new LDIFReader(inputStream));
817  }
818
819
820
821  /**
822   * Reads entries from the provided LDIF reader and returns them as a list.
823   *
824   * @param  reader  The reader from which the entries should be read.  It will
825   *                 be closed before returning.
826   *
827   * @return  A list of the entries read from the provided reader.
828   *
829   * @throws  IOException  If a problem was encountered while attempting to read
830   *                       data from the LDIF data source.
831   *
832   * @throws  LDIFException  If a problem is encountered while attempting to
833   *                         decode data read as LDIF.
834   */
835  private static List<Entry> readEntries(final LDIFReader reader)
836          throws IOException, LDIFException
837  {
838    try
839    {
840      final ArrayList<Entry> entries = new ArrayList<Entry>(10);
841      while (true)
842      {
843        final Entry e = reader.readEntry();
844        if (e == null)
845        {
846          break;
847        }
848
849        entries.add(e);
850      }
851
852      return entries;
853    }
854    finally
855    {
856      reader.close();
857    }
858  }
859
860
861
862  /**
863   * Closes this LDIF reader and the underlying LDIF source.
864   *
865   * @throws  IOException  If a problem occurs while closing the underlying LDIF
866   *                       source.
867   */
868  public void close()
869         throws IOException
870  {
871    reader.close();
872
873    if (isAsync())
874    {
875      // Closing the reader will trigger the LineReaderThread to complete, but
876      // not if it's blocked submitting the next UnparsedLDIFRecord.  To avoid
877      // this, we clear out the completed output queue, which is larger than
878      // the input queue, so the LineReaderThread will stop reading and
879      // shutdown the asyncParser.
880      asyncParsedRecords.clear();
881    }
882  }
883
884
885
886  /**
887   * Indicates whether to ignore any duplicate values encountered while reading
888   * LDIF records.
889   *
890   * @return  {@code true} if duplicate values should be ignored, or
891   *          {@code false} if any LDIF records containing duplicate values
892   *          should be rejected.
893   *
894   * @deprecated  Use the {@link #getDuplicateValueBehavior} method instead.
895   */
896  @Deprecated()
897  public boolean ignoreDuplicateValues()
898  {
899    return (duplicateValueBehavior == DuplicateValueBehavior.STRIP);
900  }
901
902
903
904  /**
905   * Specifies whether to ignore any duplicate values encountered while reading
906   * LDIF records.
907   *
908   * @param  ignoreDuplicateValues  Indicates whether to ignore duplicate
909   *                                attribute values encountered while reading
910   *                                LDIF records.
911   *
912   * @deprecated  Use the {@link #setDuplicateValueBehavior} method instead.
913   */
914  @Deprecated()
915  public void setIgnoreDuplicateValues(final boolean ignoreDuplicateValues)
916  {
917    if (ignoreDuplicateValues)
918    {
919      duplicateValueBehavior = DuplicateValueBehavior.STRIP;
920    }
921    else
922    {
923      duplicateValueBehavior = DuplicateValueBehavior.REJECT;
924    }
925  }
926
927
928
929  /**
930   * Retrieves the behavior that should be exhibited if the LDIF reader
931   * encounters an entry with duplicate values.
932   *
933   * @return  The behavior that should be exhibited if the LDIF reader
934   *          encounters an entry with duplicate values.
935   */
936  public DuplicateValueBehavior getDuplicateValueBehavior()
937  {
938    return duplicateValueBehavior;
939  }
940
941
942
943  /**
944   * Specifies the behavior that should be exhibited if the LDIF reader
945   * encounters an entry with duplicate values.
946   *
947   * @param  duplicateValueBehavior  The behavior that should be exhibited if
948   *                                 the LDIF reader encounters an entry with
949   *                                 duplicate values.
950   */
951  public void setDuplicateValueBehavior(
952                   final DuplicateValueBehavior duplicateValueBehavior)
953  {
954    this.duplicateValueBehavior = duplicateValueBehavior;
955  }
956
957
958
959  /**
960   * Indicates whether to strip off any illegal trailing spaces that may appear
961   * in LDIF records (e.g., after an entry DN or attribute value).  The LDIF
962   * specification strongly recommends that any value which legitimately
963   * contains trailing spaces be base64-encoded, and any spaces which appear
964   * after the end of non-base64-encoded values may therefore be considered
965   * invalid.  If any such trailing spaces are encountered in an LDIF record and
966   * they are not to be stripped, then an {@link LDIFException} will be thrown
967   * for that record.
968   * <BR><BR>
969   * Note that this applies only to spaces after the end of a value, and not to
970   * spaces which may appear at the end of a line for a value that is wrapped
971   * and continued on the next line.
972   *
973   * @return  {@code true} if illegal trailing spaces should be stripped off, or
974   *          {@code false} if LDIF records containing illegal trailing spaces
975   *          should be rejected.
976   *
977   * @deprecated  Use the {@link #getTrailingSpaceBehavior} method instead.
978   */
979  @Deprecated()
980  public boolean stripTrailingSpaces()
981  {
982    return (trailingSpaceBehavior == TrailingSpaceBehavior.STRIP);
983  }
984
985
986
987  /**
988   * Specifies whether to strip off any illegal trailing spaces that may appear
989   * in LDIF records (e.g., after an entry DN or attribute value).  The LDIF
990   * specification strongly recommends that any value which legitimately
991   * contains trailing spaces be base64-encoded, and any spaces which appear
992   * after the end of non-base64-encoded values may therefore be considered
993   * invalid.  If any such trailing spaces are encountered in an LDIF record and
994   * they are not to be stripped, then an {@link LDIFException} will be thrown
995   * for that record.
996   * <BR><BR>
997   * Note that this applies only to spaces after the end of a value, and not to
998   * spaces which may appear at the end of a line for a value that is wrapped
999   * and continued on the next line.
1000   *
1001   * @param  stripTrailingSpaces  Indicates whether to strip off any illegal
1002   *                              trailing spaces, or {@code false} if LDIF
1003   *                              records containing them should be rejected.
1004   *
1005   * @deprecated  Use the {@link #setTrailingSpaceBehavior} method instead.
1006   */
1007  @Deprecated()
1008  public void setStripTrailingSpaces(final boolean stripTrailingSpaces)
1009  {
1010    trailingSpaceBehavior = stripTrailingSpaces
1011         ? TrailingSpaceBehavior.STRIP
1012         : TrailingSpaceBehavior.REJECT;
1013  }
1014
1015
1016
1017  /**
1018   * Retrieves the behavior that should be exhibited when encountering attribute
1019   * values which are not base64-encoded but contain trailing spaces.  The LDIF
1020   * specification strongly recommends that any value which legitimately
1021   * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser
1022   * may be configured to automatically strip these spaces, to preserve them, or
1023   * to reject any entry or change record containing them.
1024   *
1025   * @return  The behavior that should be exhibited when encountering attribute
1026   *          values which are not base64-encoded but contain trailing spaces.
1027   */
1028  public TrailingSpaceBehavior getTrailingSpaceBehavior()
1029  {
1030    return trailingSpaceBehavior;
1031  }
1032
1033
1034
1035  /**
1036   * Specifies the behavior that should be exhibited when encountering attribute
1037   * values which are not base64-encoded but contain trailing spaces.  The LDIF
1038   * specification strongly recommends that any value which legitimately
1039   * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser
1040   * may be configured to automatically strip these spaces, to preserve them, or
1041   * to reject any entry or change record containing them.
1042   *
1043   * @param  trailingSpaceBehavior  The behavior that should be exhibited when
1044   *                                encountering attribute values which are not
1045   *                                base64-encoded but contain trailing spaces.
1046   */
1047  public void setTrailingSpaceBehavior(
1048                   final TrailingSpaceBehavior trailingSpaceBehavior)
1049  {
1050    this.trailingSpaceBehavior = trailingSpaceBehavior;
1051  }
1052
1053
1054
1055  /**
1056   * Retrieves the base path that will be prepended to relative paths in order
1057   * to obtain an absolute path.  This will only be used for "file:" URLs that
1058   * have paths which do not begin with a slash.
1059   *
1060   * @return  The base path that will be prepended to relative paths in order to
1061   *          obtain an absolute path.
1062   */
1063  public String getRelativeBasePath()
1064  {
1065    return relativeBasePath;
1066  }
1067
1068
1069
1070  /**
1071   * Specifies the base path that will be prepended to relative paths in order
1072   * to obtain an absolute path.  This will only be used for "file:" URLs that
1073   * have paths which do not begin with a space.
1074   *
1075   * @param  relativeBasePath  The base path that will be prepended to relative
1076   *                           paths in order to obtain an absolute path.
1077   */
1078  public void setRelativeBasePath(final String relativeBasePath)
1079  {
1080    setRelativeBasePath(new File(relativeBasePath));
1081  }
1082
1083
1084
1085  /**
1086   * Specifies the base path that will be prepended to relative paths in order
1087   * to obtain an absolute path.  This will only be used for "file:" URLs that
1088   * have paths which do not begin with a space.
1089   *
1090   * @param  relativeBasePath  The base path that will be prepended to relative
1091   *                           paths in order to obtain an absolute path.
1092   */
1093  public void setRelativeBasePath(final File relativeBasePath)
1094  {
1095    final String path = relativeBasePath.getAbsolutePath();
1096    if (path.endsWith(File.separator))
1097    {
1098      this.relativeBasePath = path;
1099    }
1100    else
1101    {
1102      this.relativeBasePath = path + File.separator;
1103    }
1104  }
1105
1106
1107
1108  /**
1109   * Retrieves the schema that will be used when reading LDIF records, if
1110   * defined.
1111   *
1112   * @return  The schema that will be used when reading LDIF records, or
1113   *          {@code null} if no schema should be used and all attributes should
1114   *          be treated as case-insensitive strings.
1115   */
1116  public Schema getSchema()
1117  {
1118    return schema;
1119  }
1120
1121
1122
1123  /**
1124   * Specifies the schema that should be used when reading LDIF records.
1125   *
1126   * @param  schema  The schema that should be used when reading LDIF records,
1127   *                 or {@code null} if no schema should be used and all
1128   *                 attributes should be treated as case-insensitive strings.
1129   */
1130  public void setSchema(final Schema schema)
1131  {
1132    this.schema = schema;
1133  }
1134
1135
1136
1137  /**
1138   * Reads a record from the LDIF source.  It may be either an entry or an LDIF
1139   * change record.
1140   *
1141   * @return  The record read from the LDIF source, or {@code null} if there are
1142   *          no more entries to be read.
1143   *
1144   * @throws  IOException  If a problem occurs while trying to read from the
1145   *                       LDIF source.
1146   *
1147   * @throws  LDIFException  If the data read could not be parsed as an entry or
1148   *                         an LDIF change record.
1149   */
1150  public LDIFRecord readLDIFRecord()
1151         throws IOException, LDIFException
1152  {
1153    if (isAsync())
1154    {
1155      return readLDIFRecordAsync();
1156    }
1157    else
1158    {
1159      return readLDIFRecordInternal();
1160    }
1161  }
1162
1163
1164
1165  /**
1166   * Reads an entry from the LDIF source.
1167   *
1168   * @return  The entry read from the LDIF source, or {@code null} if there are
1169   *          no more entries to be read.
1170   *
1171   * @throws  IOException  If a problem occurs while attempting to read from the
1172   *                       LDIF source.
1173   *
1174   * @throws  LDIFException  If the data read could not be parsed as an entry.
1175   */
1176  public Entry readEntry()
1177         throws IOException, LDIFException
1178  {
1179    if (isAsync())
1180    {
1181      return readEntryAsync();
1182    }
1183    else
1184    {
1185      return readEntryInternal();
1186    }
1187  }
1188
1189
1190
1191  /**
1192   * Reads an LDIF change record from the LDIF source.  The LDIF record must
1193   * have a changetype.
1194   *
1195   * @return  The change record read from the LDIF source, or {@code null} if
1196   *          there are no more records to be read.
1197   *
1198   * @throws  IOException  If a problem occurs while attempting to read from the
1199   *                       LDIF source.
1200   *
1201   * @throws  LDIFException  If the data read could not be parsed as an LDIF
1202   *                         change record.
1203   */
1204  public LDIFChangeRecord readChangeRecord()
1205         throws IOException, LDIFException
1206  {
1207    return readChangeRecord(false);
1208  }
1209
1210
1211
1212  /**
1213   * Reads an LDIF change record from the LDIF source.  Optionally, if the LDIF
1214   * record does not have a changetype, then it may be assumed to be an add
1215   * change record.
1216   *
1217   * @param  defaultAdd  Indicates whether an LDIF record not containing a
1218   *                     changetype should be retrieved as an add change record.
1219   *                     If this is {@code false} and the record read does not
1220   *                     include a changetype, then an {@link LDIFException}
1221   *                     will be thrown.
1222   *
1223   * @return  The change record read from the LDIF source, or {@code null} if
1224   *          there are no more records to be read.
1225   *
1226   * @throws  IOException  If a problem occurs while attempting to read from the
1227   *                       LDIF source.
1228   *
1229   * @throws  LDIFException  If the data read could not be parsed as an LDIF
1230   *                         change record.
1231   */
1232  public LDIFChangeRecord readChangeRecord(final boolean defaultAdd)
1233         throws IOException, LDIFException
1234  {
1235    if (isAsync())
1236    {
1237      return readChangeRecordAsync(defaultAdd);
1238    }
1239    else
1240    {
1241      return readChangeRecordInternal(defaultAdd);
1242    }
1243  }
1244
1245
1246
1247  /**
1248   * Reads the next {@code LDIFRecord}, which was read and parsed by a different
1249   * thread.
1250   *
1251   * @return  The next parsed record or {@code null} if there are no more
1252   *          records to read.
1253   *
1254   * @throws IOException  If IOException was thrown when reading or parsing
1255   *                      the record.
1256   *
1257   * @throws LDIFException If LDIFException was thrown parsing the record.
1258   */
1259  private LDIFRecord readLDIFRecordAsync()
1260          throws IOException, LDIFException
1261  {
1262    final Result<UnparsedLDIFRecord, LDIFRecord> result =
1263         readLDIFRecordResultAsync();
1264    if (result == null)
1265    {
1266      return null;
1267    }
1268    else
1269    {
1270      return result.getOutput();
1271    }
1272  }
1273
1274
1275
1276  /**
1277   * Reads an entry asynchronously from the LDIF source.
1278   *
1279   * @return The entry read from the LDIF source, or {@code null} if there are
1280   *         no more entries to be read.
1281   *
1282   * @throws IOException   If a problem occurs while attempting to read from the
1283   *                       LDIF source.
1284   * @throws LDIFException If the data read could not be parsed as an entry.
1285   */
1286  private Entry readEntryAsync()
1287          throws IOException, LDIFException
1288  {
1289    Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1290    LDIFRecord record = null;
1291    while (record == null)
1292    {
1293      result = readLDIFRecordResultAsync();
1294      if (result == null)
1295      {
1296        return null;
1297      }
1298
1299      record = result.getOutput();
1300
1301      // This is a special value that means we should skip this Entry.  We have
1302      // to use something different than null because null means EOF.
1303      if (record == SKIP_ENTRY)
1304      {
1305        record = null;
1306      }
1307    }
1308
1309    if (record instanceof Entry)
1310    {
1311      return (Entry) record;
1312    }
1313    else if (record instanceof LDIFChangeRecord)
1314    {
1315      try
1316      {
1317        // Some LDIFChangeRecord can be converted to an Entry.  This is really
1318        // an edge case though.
1319        return ((LDIFChangeRecord)record).toEntry();
1320      }
1321      catch (LDIFException e)
1322      {
1323        debugException(e);
1324        final long firstLineNumber = result.getInput().getFirstLineNumber();
1325        throw new LDIFException(e.getExceptionMessage(),
1326                                firstLineNumber, true, e);
1327      }
1328    }
1329
1330    throw new AssertionError("LDIFRecords must either be an Entry or an " +
1331                             "LDIFChangeRecord");
1332  }
1333
1334
1335
1336  /**
1337   * Reads an LDIF change record from the LDIF source asynchronously.
1338   * Optionally, if the LDIF record does not have a changetype, then it may be
1339   * assumed to be an add change record.
1340   *
1341   * @param defaultAdd Indicates whether an LDIF record not containing a
1342   *                   changetype should be retrieved as an add change record.
1343   *                   If this is {@code false} and the record read does not
1344   *                   include a changetype, then an {@link LDIFException} will
1345   *                   be thrown.
1346   *
1347   * @return The change record read from the LDIF source, or {@code null} if
1348   *         there are no more records to be read.
1349   *
1350   * @throws IOException   If a problem occurs while attempting to read from the
1351   *                       LDIF source.
1352   * @throws LDIFException If the data read could not be parsed as an LDIF
1353   *                       change record.
1354   */
1355  private LDIFChangeRecord readChangeRecordAsync(final boolean defaultAdd)
1356          throws IOException, LDIFException
1357  {
1358    Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1359    LDIFRecord record = null;
1360    while (record == null)
1361    {
1362      result = readLDIFRecordResultAsync();
1363      if (result == null)
1364      {
1365        return null;
1366      }
1367
1368      record = result.getOutput();
1369
1370      // This is a special value that means we should skip this Entry.  We have
1371      // to use something different than null because null means EOF.
1372      if (record == SKIP_ENTRY)
1373      {
1374        record = null;
1375      }
1376    }
1377
1378    if (record instanceof LDIFChangeRecord)
1379    {
1380      return (LDIFChangeRecord) record;
1381    }
1382    else if (record instanceof Entry)
1383    {
1384      if (defaultAdd)
1385      {
1386        return new LDIFAddChangeRecord((Entry) record);
1387      }
1388      else
1389      {
1390        final long firstLineNumber = result.getInput().getFirstLineNumber();
1391        throw new LDIFException(
1392             ERR_READ_NOT_CHANGE_RECORD.get(firstLineNumber), firstLineNumber,
1393             true);
1394      }
1395    }
1396
1397    throw new AssertionError("LDIFRecords must either be an Entry or an " +
1398                             "LDIFChangeRecord");
1399  }
1400
1401
1402
1403  /**
1404   * Reads the next LDIF record, which was read and parsed asynchronously by
1405   * separate threads.
1406   *
1407   * @return  The next LDIF record or {@code null} if there are no more records.
1408   *
1409   * @throws  IOException  If a problem occurs while attempting to read from the
1410   *                       LDIF source.
1411   *
1412   * @throws  LDIFException  If the data read could not be parsed as an entry.
1413   */
1414  private Result<UnparsedLDIFRecord, LDIFRecord> readLDIFRecordResultAsync()
1415          throws IOException, LDIFException
1416  {
1417    Result<UnparsedLDIFRecord, LDIFRecord> result = null;
1418
1419    // If the asynchronous reading and parsing is complete, then we don't have
1420    // to block waiting for the next record to show up on the queue.  If there
1421    // isn't a record there, then return null (EOF) right away.
1422    if (asyncParsingComplete.get())
1423    {
1424      result = asyncParsedRecords.poll();
1425    }
1426    else
1427    {
1428      try
1429      {
1430        // We probably could just do a asyncParsedRecords.take() here, but
1431        // there are some edge case error scenarios where
1432        // asyncParsingComplete might be set without a special EOF sentinel
1433        // Result enqueued.  So to guard against this, we have a very cautious
1434        // polling interval of 1 second.  During normal processing, we never
1435        // have to wait for this to expire, when there is something to do
1436        // (like shutdown).
1437        while ((result == null) && (!asyncParsingComplete.get()))
1438        {
1439          result = asyncParsedRecords.poll(1, TimeUnit.SECONDS);
1440        }
1441
1442        // There's a very small chance that we missed the value, so double-check
1443        if (result == null)
1444        {
1445          result = asyncParsedRecords.poll();
1446        }
1447      }
1448      catch (InterruptedException e)
1449      {
1450        debugException(e);
1451        throw createIOExceptionWithCause(null, e);
1452      }
1453    }
1454    if (result == null)
1455    {
1456      return null;
1457    }
1458
1459    rethrow(result.getFailureCause());
1460
1461    // Check if we reached the end of the input
1462    final UnparsedLDIFRecord unparsedRecord = result.getInput();
1463    if (unparsedRecord.isEOF())
1464    {
1465      // This might have been set already by the LineReaderThread, but
1466      // just in case it hasn't gotten to it yet, do so here.
1467      asyncParsingComplete.set(true);
1468
1469      // Enqueue this EOF result again for any other thread that might be
1470      // blocked in asyncParsedRecords.take() even though having multiple
1471      // threads call this method concurrently breaks the contract of this
1472      // class.
1473      try
1474      {
1475        asyncParsedRecords.put(result);
1476      }
1477      catch (InterruptedException e)
1478      {
1479        // We shouldn't ever get interrupted because the put won't ever block.
1480        // Once we are done reading, this is the only item left in the queue,
1481        // so we should always be able to re-enqueue it.
1482        debugException(e);
1483      }
1484      return null;
1485    }
1486
1487    return result;
1488  }
1489
1490
1491
1492  /**
1493   * Indicates whether this LDIF reader was constructed to perform asynchronous
1494   * processing.
1495   *
1496   * @return  {@code true} if this LDIFReader was constructed to perform
1497   *          asynchronous processing, or {@code false} if not.
1498   */
1499  private boolean isAsync()
1500  {
1501    return isAsync;
1502  }
1503
1504
1505
1506  /**
1507   * If not {@code null}, rethrows the specified Throwable as either an
1508   * IOException or LDIFException.
1509   *
1510   * @param t  The exception to rethrow.  If it's {@code null}, then nothing
1511   *           is thrown.
1512   *
1513   * @throws IOException   If t is an IOException or a checked Exception that
1514   *                       is not an LDIFException.
1515   * @throws LDIFException  If t is an LDIFException.
1516   */
1517  static void rethrow(final Throwable t)
1518         throws IOException, LDIFException
1519  {
1520    if (t == null)
1521    {
1522      return;
1523    }
1524
1525    if (t instanceof IOException)
1526    {
1527      throw (IOException) t;
1528    }
1529    else if (t instanceof LDIFException)
1530    {
1531      throw (LDIFException) t;
1532    }
1533    else if (t instanceof RuntimeException)
1534    {
1535      throw (RuntimeException) t;
1536    }
1537    else if (t instanceof Error)
1538    {
1539      throw (Error) t;
1540    }
1541    else
1542    {
1543      throw createIOExceptionWithCause(null, t);
1544    }
1545  }
1546
1547
1548
1549  /**
1550   * Reads a record from the LDIF source.  It may be either an entry or an LDIF
1551   * change record.
1552   *
1553   * @return The record read from the LDIF source, or {@code null} if there are
1554   *         no more entries to be read.
1555   *
1556   * @throws IOException   If a problem occurs while trying to read from the
1557   *                       LDIF source.
1558   * @throws LDIFException If the data read could not be parsed as an entry or
1559   *                       an LDIF change record.
1560   */
1561  private LDIFRecord readLDIFRecordInternal()
1562       throws IOException, LDIFException
1563  {
1564    final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1565    return decodeRecord(unparsedRecord, relativeBasePath, schema);
1566  }
1567
1568
1569
1570  /**
1571   * Reads an entry from the LDIF source.
1572   *
1573   * @return The entry read from the LDIF source, or {@code null} if there are
1574   *         no more entries to be read.
1575   *
1576   * @throws IOException   If a problem occurs while attempting to read from the
1577   *                       LDIF source.
1578   * @throws LDIFException If the data read could not be parsed as an entry.
1579   */
1580  private Entry readEntryInternal()
1581       throws IOException, LDIFException
1582  {
1583    Entry e = null;
1584    while (e == null)
1585    {
1586      final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1587      if (unparsedRecord.isEOF())
1588      {
1589        return null;
1590      }
1591
1592      e = decodeEntry(unparsedRecord, relativeBasePath);
1593      debugLDIFRead(e);
1594
1595      if (entryTranslator != null)
1596      {
1597        e = entryTranslator.translate(e, unparsedRecord.getFirstLineNumber());
1598      }
1599    }
1600    return e;
1601  }
1602
1603
1604
1605  /**
1606   * Reads an LDIF change record from the LDIF source.  Optionally, if the LDIF
1607   * record does not have a changetype, then it may be assumed to be an add
1608   * change record.
1609   *
1610   * @param defaultAdd Indicates whether an LDIF record not containing a
1611   *                   changetype should be retrieved as an add change record.
1612   *                   If this is {@code false} and the record read does not
1613   *                   include a changetype, then an {@link LDIFException} will
1614   *                   be thrown.
1615   *
1616   * @return The change record read from the LDIF source, or {@code null} if
1617   *         there are no more records to be read.
1618   *
1619   * @throws IOException   If a problem occurs while attempting to read from the
1620   *                       LDIF source.
1621   * @throws LDIFException If the data read could not be parsed as an LDIF
1622   *                       change record.
1623   */
1624  private LDIFChangeRecord readChangeRecordInternal(final boolean defaultAdd)
1625       throws IOException, LDIFException
1626  {
1627    LDIFChangeRecord r = null;
1628    while (r == null)
1629    {
1630      final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord();
1631      if (unparsedRecord.isEOF())
1632      {
1633        return null;
1634      }
1635
1636      r = decodeChangeRecord(unparsedRecord, relativeBasePath, defaultAdd,
1637           schema);
1638      debugLDIFRead(r);
1639
1640      if (changeRecordTranslator != null)
1641      {
1642        r = changeRecordTranslator.translate(r,
1643             unparsedRecord.getFirstLineNumber());
1644      }
1645    }
1646    return r;
1647  }
1648
1649
1650
1651  /**
1652   * Reads a record (either an entry or a change record) from the LDIF source
1653   * and places it in the line list.
1654   *
1655   * @return  The line number for the first line of the entry that was read.
1656   *
1657   * @throws  IOException  If a problem occurs while attempting to read from the
1658   *                       LDIF source.
1659   *
1660   * @throws  LDIFException  If the data read could not be parsed as a valid
1661   *                         LDIF record.
1662   */
1663  private UnparsedLDIFRecord readUnparsedRecord()
1664         throws IOException, LDIFException
1665  {
1666    final ArrayList<StringBuilder> lineList = new ArrayList<StringBuilder>(20);
1667    boolean lastWasComment = false;
1668    long firstLineNumber = lineNumberCounter + 1;
1669    while (true)
1670    {
1671      final String line = reader.readLine();
1672      lineNumberCounter++;
1673
1674      if (line == null)
1675      {
1676        // We've hit the end of the LDIF source.  If we haven't read any entry
1677        // data, then return null.  Otherwise, the last entry wasn't followed by
1678        // a blank line, which is OK, and we should decode that entry.
1679        if (lineList.isEmpty())
1680        {
1681          return new UnparsedLDIFRecord(new ArrayList<StringBuilder>(0),
1682               duplicateValueBehavior, trailingSpaceBehavior, schema, -1);
1683        }
1684        else
1685        {
1686          break;
1687        }
1688      }
1689
1690      if (line.length() == 0)
1691      {
1692        // It's a blank line.  If we have read entry data, then this signals the
1693        // end of the entry.  Otherwise, it's an extra space between entries,
1694        // which is OK.
1695        lastWasComment = false;
1696        if (lineList.isEmpty())
1697        {
1698          firstLineNumber++;
1699          continue;
1700        }
1701        else
1702        {
1703          break;
1704        }
1705      }
1706
1707      if (line.charAt(0) == ' ')
1708      {
1709        // The line starts with a space, which means that it must be a
1710        // continuation of the previous line.  This is true even if the last
1711        // line was a comment.
1712        if (lastWasComment)
1713        {
1714          // What we've read is part of a comment, so we don't care about its
1715          // content.
1716        }
1717        else if (lineList.isEmpty())
1718        {
1719          throw new LDIFException(
1720                         ERR_READ_UNEXPECTED_FIRST_SPACE.get(lineNumberCounter),
1721                         lineNumberCounter, false);
1722        }
1723        else
1724        {
1725          lineList.get(lineList.size() - 1).append(line.substring(1));
1726          lastWasComment = false;
1727        }
1728      }
1729      else if (line.charAt(0) == '#')
1730      {
1731        lastWasComment = true;
1732      }
1733      else
1734      {
1735        // We want to make sure that we skip over the "version:" line if it
1736        // exists, but that should only occur at the beginning of an entry where
1737        // it can't be confused with a possible "version" attribute.
1738        if (lineList.isEmpty() && line.startsWith("version:"))
1739        {
1740          lastWasComment = true;
1741        }
1742        else
1743        {
1744          lineList.add(new StringBuilder(line));
1745          lastWasComment = false;
1746        }
1747      }
1748    }
1749
1750    return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
1751         trailingSpaceBehavior, schema, firstLineNumber);
1752  }
1753
1754
1755
1756  /**
1757   * Decodes the provided set of LDIF lines as an entry.  The provided set of
1758   * lines must contain exactly one entry.  Long lines may be wrapped as per the
1759   * LDIF specification, and it is acceptable to have one or more blank lines
1760   * following the entry. A default trailing space behavior of
1761   * {@link TrailingSpaceBehavior#REJECT} will be used.
1762   *
1763   * @param  ldifLines  The set of lines that comprise the LDIF representation
1764   *                    of the entry.  It must not be {@code null} or empty.
1765   *
1766   * @return  The entry read from LDIF.
1767   *
1768   * @throws  LDIFException  If the provided LDIF data cannot be decoded as an
1769   *                         entry.
1770   */
1771  public static Entry decodeEntry(final String... ldifLines)
1772         throws LDIFException
1773  {
1774    final Entry e = decodeEntry(prepareRecord(DuplicateValueBehavior.STRIP,
1775         TrailingSpaceBehavior.REJECT, null, ldifLines),
1776         DEFAULT_RELATIVE_BASE_PATH);
1777    debugLDIFRead(e);
1778    return e;
1779  }
1780
1781
1782
1783  /**
1784   * Decodes the provided set of LDIF lines as an entry.  The provided set of
1785   * lines must contain exactly one entry.  Long lines may be wrapped as per the
1786   * LDIF specification, and it is acceptable to have one or more blank lines
1787   * following the entry. A default trailing space behavior of
1788   * {@link TrailingSpaceBehavior#REJECT} will be used.
1789   *
1790   * @param  ignoreDuplicateValues  Indicates whether to ignore duplicate
1791   *                                attribute values encountered while parsing.
1792   * @param  schema                 The schema to use when parsing the record,
1793   *                                if applicable.
1794   * @param  ldifLines              The set of lines that comprise the LDIF
1795   *                                representation of the entry.  It must not be
1796   *                                {@code null} or empty.
1797   *
1798   * @return  The entry read from LDIF.
1799   *
1800   * @throws  LDIFException  If the provided LDIF data cannot be decoded as an
1801   *                         entry.
1802   */
1803  public static Entry decodeEntry(final boolean ignoreDuplicateValues,
1804                                  final Schema schema,
1805                                  final String... ldifLines)
1806         throws LDIFException
1807  {
1808    return decodeEntry(ignoreDuplicateValues, TrailingSpaceBehavior.REJECT,
1809         schema, ldifLines);
1810  }
1811
1812
1813
1814  /**
1815   * Decodes the provided set of LDIF lines as an entry.  The provided set of
1816   * lines must contain exactly one entry.  Long lines may be wrapped as per the
1817   * LDIF specification, and it is acceptable to have one or more blank lines
1818   * following the entry.
1819   *
1820   * @param  ignoreDuplicateValues  Indicates whether to ignore duplicate
1821   *                                attribute values encountered while parsing.
1822   * @param  trailingSpaceBehavior  The behavior that should be exhibited when
1823   *                                encountering attribute values which are not
1824   *                                base64-encoded but contain trailing spaces.
1825   *                                It must not be {@code null}.
1826   * @param  schema                 The schema to use when parsing the record,
1827   *                                if applicable.
1828   * @param  ldifLines              The set of lines that comprise the LDIF
1829   *                                representation of the entry.  It must not be
1830   *                                {@code null} or empty.
1831   *
1832   * @return  The entry read from LDIF.
1833   *
1834   * @throws  LDIFException  If the provided LDIF data cannot be decoded as an
1835   *                         entry.
1836   */
1837  public static Entry decodeEntry(
1838         final boolean ignoreDuplicateValues,
1839         final TrailingSpaceBehavior trailingSpaceBehavior,
1840         final Schema schema,
1841         final String... ldifLines) throws LDIFException
1842  {
1843    final Entry e = decodeEntry(prepareRecord(
1844              (ignoreDuplicateValues
1845                   ? DuplicateValueBehavior.STRIP
1846                   : DuplicateValueBehavior.REJECT),
1847         trailingSpaceBehavior, schema, ldifLines),
1848         DEFAULT_RELATIVE_BASE_PATH);
1849    debugLDIFRead(e);
1850    return e;
1851  }
1852
1853
1854
1855  /**
1856   * Decodes the provided set of LDIF lines as an LDIF change record.  The
1857   * provided set of lines must contain exactly one change record and it must
1858   * include a changetype.  Long lines may be wrapped as per the LDIF
1859   * specification, and it is acceptable to have one or more blank lines
1860   * following the entry.
1861   *
1862   * @param  ldifLines  The set of lines that comprise the LDIF representation
1863   *                    of the change record.  It must not be {@code null} or
1864   *                    empty.
1865   *
1866   * @return  The change record read from LDIF.
1867   *
1868   * @throws  LDIFException  If the provided LDIF data cannot be decoded as a
1869   *                         change record.
1870   */
1871  public static LDIFChangeRecord decodeChangeRecord(final String... ldifLines)
1872         throws LDIFException
1873  {
1874    return decodeChangeRecord(false, ldifLines);
1875  }
1876
1877
1878
1879  /**
1880   * Decodes the provided set of LDIF lines as an LDIF change record.  The
1881   * provided set of lines must contain exactly one change record.  Long lines
1882   * may be wrapped as per the LDIF specification, and it is acceptable to have
1883   * one or more blank lines following the entry.
1884   *
1885   * @param  defaultAdd  Indicates whether an LDIF record not containing a
1886   *                     changetype should be retrieved as an add change record.
1887   *                     If this is {@code false} and the record read does not
1888   *                     include a changetype, then an {@link LDIFException}
1889   *                     will be thrown.
1890   * @param  ldifLines  The set of lines that comprise the LDIF representation
1891   *                    of the change record.  It must not be {@code null} or
1892   *                    empty.
1893   *
1894   * @return  The change record read from LDIF.
1895   *
1896   * @throws  LDIFException  If the provided LDIF data cannot be decoded as a
1897   *                         change record.
1898   */
1899  public static LDIFChangeRecord decodeChangeRecord(final boolean defaultAdd,
1900                                                    final String... ldifLines)
1901         throws LDIFException
1902  {
1903    final LDIFChangeRecord r =
1904         decodeChangeRecord(
1905              prepareRecord(DuplicateValueBehavior.STRIP,
1906                   TrailingSpaceBehavior.REJECT, null, ldifLines),
1907              DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null);
1908    debugLDIFRead(r);
1909    return r;
1910  }
1911
1912
1913
1914  /**
1915   * Decodes the provided set of LDIF lines as an LDIF change record.  The
1916   * provided set of lines must contain exactly one change record.  Long lines
1917   * may be wrapped as per the LDIF specification, and it is acceptable to have
1918   * one or more blank lines following the entry.
1919   *
1920   * @param  ignoreDuplicateValues  Indicates whether to ignore duplicate
1921   *                                attribute values encountered while parsing.
1922   * @param  schema                 The schema to use when processing the change
1923   *                                record, or {@code null} if no schema should
1924   *                                be used and all values should be treated as
1925   *                                case-insensitive strings.
1926   * @param  defaultAdd             Indicates whether an LDIF record not
1927   *                                containing a changetype should be retrieved
1928   *                                as an add change record.  If this is
1929   *                                {@code false} and the record read does not
1930   *                                include a changetype, then an
1931   *                                {@link LDIFException} will be thrown.
1932   * @param  ldifLines              The set of lines that comprise the LDIF
1933   *                                representation of the change record.  It
1934   *                                must not be {@code null} or empty.
1935   *
1936   * @return  The change record read from LDIF.
1937   *
1938   * @throws  LDIFException  If the provided LDIF data cannot be decoded as a
1939   *                         change record.
1940   */
1941  public static LDIFChangeRecord decodeChangeRecord(
1942                                      final boolean ignoreDuplicateValues,
1943                                      final Schema schema,
1944                                      final boolean defaultAdd,
1945                                      final String... ldifLines)
1946         throws LDIFException
1947  {
1948    return decodeChangeRecord(ignoreDuplicateValues,
1949         TrailingSpaceBehavior.REJECT, schema, defaultAdd, ldifLines);
1950  }
1951
1952
1953
1954  /**
1955   * Decodes the provided set of LDIF lines as an LDIF change record.  The
1956   * provided set of lines must contain exactly one change record.  Long lines
1957   * may be wrapped as per the LDIF specification, and it is acceptable to have
1958   * one or more blank lines following the entry.
1959   *
1960   * @param  ignoreDuplicateValues  Indicates whether to ignore duplicate
1961   *                                attribute values encountered while parsing.
1962   * @param  trailingSpaceBehavior  The behavior that should be exhibited when
1963   *                                encountering attribute values which are not
1964   *                                base64-encoded but contain trailing spaces.
1965   *                                It must not be {@code null}.
1966   * @param  schema                 The schema to use when processing the change
1967   *                                record, or {@code null} if no schema should
1968   *                                be used and all values should be treated as
1969   *                                case-insensitive strings.
1970   * @param  defaultAdd             Indicates whether an LDIF record not
1971   *                                containing a changetype should be retrieved
1972   *                                as an add change record.  If this is
1973   *                                {@code false} and the record read does not
1974   *                                include a changetype, then an
1975   *                                {@link LDIFException} will be thrown.
1976   * @param  ldifLines              The set of lines that comprise the LDIF
1977   *                                representation of the change record.  It
1978   *                                must not be {@code null} or empty.
1979   *
1980   * @return  The change record read from LDIF.
1981   *
1982   * @throws  LDIFException  If the provided LDIF data cannot be decoded as a
1983   *                         change record.
1984   */
1985  public static LDIFChangeRecord decodeChangeRecord(
1986                     final boolean ignoreDuplicateValues,
1987                     final TrailingSpaceBehavior trailingSpaceBehavior,
1988                     final Schema schema,
1989                     final boolean defaultAdd,
1990                     final String... ldifLines)
1991         throws LDIFException
1992  {
1993    final LDIFChangeRecord r = decodeChangeRecord(
1994         prepareRecord(
1995              (ignoreDuplicateValues
1996                   ? DuplicateValueBehavior.STRIP
1997                   : DuplicateValueBehavior.REJECT),
1998              trailingSpaceBehavior, schema, ldifLines),
1999         DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null);
2000    debugLDIFRead(r);
2001    return r;
2002  }
2003
2004
2005
2006  /**
2007   * Parses the provided set of lines into a list of {@code StringBuilder}
2008   * objects suitable for decoding into an entry or LDIF change record.
2009   * Comments will be ignored and wrapped lines will be unwrapped.
2010   *
2011   * @param  duplicateValueBehavior  The behavior that should be exhibited if
2012   *                                 the LDIF reader encounters an entry with
2013   *                                 duplicate values.
2014   * @param  trailingSpaceBehavior   The behavior that should be exhibited when
2015   *                                 encountering attribute values which are not
2016   *                                 base64-encoded but contain trailing spaces.
2017   * @param  schema                  The schema to use when parsing the record,
2018   *                                 if applicable.
2019   * @param  ldifLines               The set of lines that comprise the record
2020   *                                 to decode.  It must not be {@code null} or
2021   *                                 empty.
2022   *
2023   * @return  The prepared list of {@code StringBuilder} objects ready to be
2024   *          decoded.
2025   *
2026   * @throws  LDIFException  If the provided lines do not contain valid LDIF
2027   *                         content.
2028   */
2029  private static UnparsedLDIFRecord prepareRecord(
2030                      final DuplicateValueBehavior duplicateValueBehavior,
2031                      final TrailingSpaceBehavior trailingSpaceBehavior,
2032                      final Schema schema, final String... ldifLines)
2033          throws LDIFException
2034  {
2035    ensureNotNull(ldifLines);
2036    ensureFalse(ldifLines.length == 0,
2037                "LDIFReader.prepareRecord.ldifLines must not be empty.");
2038
2039    boolean lastWasComment = false;
2040    final ArrayList<StringBuilder> lineList =
2041         new ArrayList<StringBuilder>(ldifLines.length);
2042    for (int i=0; i < ldifLines.length; i++)
2043    {
2044      final String line = ldifLines[i];
2045      if (line.length() == 0)
2046      {
2047        // This is only acceptable if there are no more non-empty lines in the
2048        // array.
2049        for (int j=i+1; j < ldifLines.length; j++)
2050        {
2051          if (ldifLines[j].length() > 0)
2052          {
2053            throw new LDIFException(ERR_READ_UNEXPECTED_BLANK.get(i), i, true,
2054                                    ldifLines, null);
2055          }
2056
2057          // If we've gotten here, then we know that we're at the end of the
2058          // entry.  If we have read data, then we can decode it as an entry.
2059          // Otherwise, there was no real data in the provided LDIF lines.
2060          if (lineList.isEmpty())
2061          {
2062            throw new LDIFException(ERR_READ_ONLY_BLANKS.get(), 0, true,
2063                                    ldifLines, null);
2064          }
2065          else
2066          {
2067            return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
2068                 trailingSpaceBehavior, schema, 0);
2069          }
2070        }
2071      }
2072
2073      if (line.charAt(0) == ' ')
2074      {
2075        if (i > 0)
2076        {
2077          if (! lastWasComment)
2078          {
2079            lineList.get(lineList.size() - 1).append(line.substring(1));
2080          }
2081        }
2082        else
2083        {
2084          throw new LDIFException(
2085                         ERR_READ_UNEXPECTED_FIRST_SPACE_NO_NUMBER.get(), 0,
2086                         true, ldifLines, null);
2087        }
2088      }
2089      else if (line.charAt(0) == '#')
2090      {
2091        lastWasComment = true;
2092      }
2093      else
2094      {
2095        lineList.add(new StringBuilder(line));
2096        lastWasComment = false;
2097      }
2098    }
2099
2100    if (lineList.isEmpty())
2101    {
2102      throw new LDIFException(ERR_READ_NO_DATA.get(), 0, true, ldifLines, null);
2103    }
2104    else
2105    {
2106      return new UnparsedLDIFRecord(lineList, duplicateValueBehavior,
2107           trailingSpaceBehavior, schema, 0);
2108    }
2109  }
2110
2111
2112
2113  /**
2114   * Decodes the unparsed record that was read from the LDIF source.  It may be
2115   * either an entry or an LDIF change record.
2116   *
2117   * @param  unparsedRecord    The unparsed LDIF record that was read from the
2118   *                           input.  It must not be {@code null} or empty.
2119   * @param  relativeBasePath  The base path that will be prepended to relative
2120   *                           paths in order to obtain an absolute path.
2121   * @param  schema            The schema to use when parsing.
2122   *
2123   * @return  The parsed record, or {@code null} if there are no more entries to
2124   *          be read.
2125   *
2126   * @throws  LDIFException  If the data read could not be parsed as an entry or
2127   *                         an LDIF change record.
2128   */
2129  private static LDIFRecord decodeRecord(
2130                                 final UnparsedLDIFRecord unparsedRecord,
2131                                 final String relativeBasePath,
2132                                 final Schema schema)
2133       throws LDIFException
2134  {
2135    // If there was an error reading from the input, then we rethrow it here.
2136    final Exception readError = unparsedRecord.getFailureCause();
2137    if (readError != null)
2138    {
2139      if (readError instanceof LDIFException)
2140      {
2141        // If the error was an LDIFException, which will normally be the case,
2142        // then rethrow it with all of the same state.  We could just
2143        //   throw (LDIFException) readError;
2144        // but that's considered bad form.
2145        final LDIFException ldifEx = (LDIFException) readError;
2146        throw new LDIFException(ldifEx.getMessage(),
2147                                ldifEx.getLineNumber(),
2148                                ldifEx.mayContinueReading(),
2149                                ldifEx.getDataLines(),
2150                                ldifEx.getCause());
2151      }
2152      else
2153      {
2154        throw new LDIFException(getExceptionMessage(readError),
2155                                -1, true, readError);
2156      }
2157    }
2158
2159    if (unparsedRecord.isEOF())
2160    {
2161      return null;
2162    }
2163
2164    final ArrayList<StringBuilder> lineList = unparsedRecord.getLineList();
2165    if (unparsedRecord.getLineList() == null)
2166    {
2167      return null;  // We can get here if there was an error reading the lines.
2168    }
2169
2170    final LDIFRecord r;
2171    if (lineList.size() == 1)
2172    {
2173      r = decodeEntry(unparsedRecord, relativeBasePath);
2174    }
2175    else
2176    {
2177      final String lowerSecondLine = toLowerCase(lineList.get(1).toString());
2178      if (lowerSecondLine.startsWith("control:") ||
2179          lowerSecondLine.startsWith("changetype:"))
2180      {
2181        r = decodeChangeRecord(unparsedRecord, relativeBasePath, true, schema);
2182      }
2183      else
2184      {
2185        r = decodeEntry(unparsedRecord, relativeBasePath);
2186      }
2187    }
2188
2189    debugLDIFRead(r);
2190    return r;
2191  }
2192
2193
2194
2195  /**
2196   * Decodes the provided set of LDIF lines as an entry.  The provided list must
2197   * not contain any blank lines or comments, and lines are not allowed to be
2198   * wrapped.
2199   *
2200   * @param  unparsedRecord   The unparsed LDIF record that was read from the
2201   *                          input.  It must not be {@code null} or empty.
2202   * @param  relativeBasePath  The base path that will be prepended to relative
2203   *                           paths in order to obtain an absolute path.
2204   *
2205   * @return  The entry read from LDIF.
2206   *
2207   * @throws  LDIFException  If the provided LDIF data cannot be read as an
2208   *                         entry.
2209   */
2210  private static Entry decodeEntry(final UnparsedLDIFRecord unparsedRecord,
2211                                   final String relativeBasePath)
2212          throws LDIFException
2213  {
2214    final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList();
2215    final long firstLineNumber = unparsedRecord.getFirstLineNumber();
2216
2217    final Iterator<StringBuilder> iterator = ldifLines.iterator();
2218
2219    // The first line must start with either "version:" or "dn:".  If the first
2220    // line starts with "version:" then the second must start with "dn:".
2221    StringBuilder line = iterator.next();
2222    handleTrailingSpaces(line, null, firstLineNumber,
2223         unparsedRecord.getTrailingSpaceBehavior());
2224    int colonPos = line.indexOf(":");
2225    if ((colonPos > 0) &&
2226        line.substring(0, colonPos).equalsIgnoreCase("version"))
2227    {
2228      // The first line is "version:".  Under most conditions, this will be
2229      // handled by the LDIF reader, but this can happen if you call
2230      // decodeEntry with a set of data that includes a version.  At any rate,
2231      // read the next line, which must specify the DN.
2232      line = iterator.next();
2233      handleTrailingSpaces(line, null, firstLineNumber,
2234           unparsedRecord.getTrailingSpaceBehavior());
2235    }
2236
2237    colonPos = line.indexOf(":");
2238    if ((colonPos < 0) ||
2239         (! line.substring(0, colonPos).equalsIgnoreCase("dn")))
2240    {
2241      throw new LDIFException(
2242           ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber),
2243           firstLineNumber, true, ldifLines, null);
2244    }
2245
2246    final String dn;
2247    final int length = line.length();
2248    if (length == (colonPos+1))
2249    {
2250      // The colon was the last character on the line.  This is acceptable and
2251      // indicates that the entry has the null DN.
2252      dn = "";
2253    }
2254    else if (line.charAt(colonPos+1) == ':')
2255    {
2256      // Skip over any spaces leading up to the value, and then the rest of the
2257      // string is the base64-encoded DN.
2258      int pos = colonPos+2;
2259      while ((pos < length) && (line.charAt(pos) == ' '))
2260      {
2261        pos++;
2262      }
2263
2264      try
2265      {
2266        final byte[] dnBytes = Base64.decode(line.substring(pos));
2267        dn = new String(dnBytes, "UTF-8");
2268      }
2269      catch (final ParseException pe)
2270      {
2271        debugException(pe);
2272        throw new LDIFException(
2273                       ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2274                                                            pe.getMessage()),
2275                       firstLineNumber, true, ldifLines, pe);
2276      }
2277      catch (final Exception e)
2278      {
2279        debugException(e);
2280        throw new LDIFException(
2281                       ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, e),
2282                       firstLineNumber, true, ldifLines, e);
2283      }
2284    }
2285    else
2286    {
2287      // Skip over any spaces leading up to the value, and then the rest of the
2288      // string is the DN.
2289      int pos = colonPos+1;
2290      while ((pos < length) && (line.charAt(pos) == ' '))
2291      {
2292        pos++;
2293      }
2294
2295      dn = line.substring(pos);
2296    }
2297
2298
2299    // The remaining lines must be the attributes for the entry.  However, we
2300    // will allow the case in which an entry does not have any attributes, to be
2301    // able to support reading search result entries in which no attributes were
2302    // returned.
2303    if (! iterator.hasNext())
2304    {
2305      return new Entry(dn, unparsedRecord.getSchema());
2306    }
2307
2308    return new Entry(dn, unparsedRecord.getSchema(),
2309         parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(),
2310              unparsedRecord.getTrailingSpaceBehavior(),
2311              unparsedRecord.getSchema(), ldifLines, iterator, relativeBasePath,
2312              firstLineNumber));
2313  }
2314
2315
2316
2317  /**
2318   * Decodes the provided set of LDIF lines as a change record.  The provided
2319   * list must not contain any blank lines or comments, and lines are not
2320   * allowed to be wrapped.
2321   *
2322   * @param  unparsedRecord    The unparsed LDIF record that was read from the
2323   *                           input.  It must not be {@code null} or empty.
2324   * @param  relativeBasePath  The base path that will be prepended to relative
2325   *                           paths in order to obtain an absolute path.
2326   * @param  defaultAdd        Indicates whether an LDIF record not containing a
2327   *                           changetype should be retrieved as an add change
2328   *                           record.  If this is {@code false} and the record
2329   *                           read does not include a changetype, then an
2330   *                           {@link LDIFException} will be thrown.
2331   * @param  schema            The schema to use in parsing.
2332   *
2333   * @return  The change record read from LDIF.
2334   *
2335   * @throws  LDIFException  If the provided LDIF data cannot be decoded as a
2336   *                         change record.
2337   */
2338  private static LDIFChangeRecord decodeChangeRecord(
2339                                       final UnparsedLDIFRecord unparsedRecord,
2340                                       final String relativeBasePath,
2341                                       final boolean defaultAdd,
2342                                       final Schema schema)
2343          throws LDIFException
2344  {
2345    final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList();
2346    final long firstLineNumber = unparsedRecord.getFirstLineNumber();
2347
2348    Iterator<StringBuilder> iterator = ldifLines.iterator();
2349
2350    // The first line must start with either "version:" or "dn:".  If the first
2351    // line starts with "version:" then the second must start with "dn:".
2352    StringBuilder line = iterator.next();
2353    handleTrailingSpaces(line, null, firstLineNumber,
2354         unparsedRecord.getTrailingSpaceBehavior());
2355    int colonPos = line.indexOf(":");
2356    int linesRead = 1;
2357    if ((colonPos > 0) &&
2358        line.substring(0, colonPos).equalsIgnoreCase("version"))
2359    {
2360      // The first line is "version:".  Under most conditions, this will be
2361      // handled by the LDIF reader, but this can happen if you call
2362      // decodeEntry with a set of data that includes a version.  At any rate,
2363      // read the next line, which must specify the DN.
2364      line = iterator.next();
2365      linesRead++;
2366      handleTrailingSpaces(line, null, firstLineNumber,
2367           unparsedRecord.getTrailingSpaceBehavior());
2368    }
2369
2370    colonPos = line.indexOf(":");
2371    if ((colonPos < 0) ||
2372         (! line.substring(0, colonPos).equalsIgnoreCase("dn")))
2373    {
2374      throw new LDIFException(
2375           ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber),
2376           firstLineNumber, true, ldifLines, null);
2377    }
2378
2379    final String dn;
2380    int length = line.length();
2381    if (length == (colonPos+1))
2382    {
2383      // The colon was the last character on the line.  This is acceptable and
2384      // indicates that the entry has the null DN.
2385      dn = "";
2386    }
2387    else if (line.charAt(colonPos+1) == ':')
2388    {
2389      // Skip over any spaces leading up to the value, and then the rest of the
2390      // string is the base64-encoded DN.
2391      int pos = colonPos+2;
2392      while ((pos < length) && (line.charAt(pos) == ' '))
2393      {
2394        pos++;
2395      }
2396
2397      try
2398      {
2399        final byte[] dnBytes = Base64.decode(line.substring(pos));
2400        dn = new String(dnBytes, "UTF-8");
2401      }
2402      catch (final ParseException pe)
2403      {
2404        debugException(pe);
2405        throw new LDIFException(
2406                       ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2407                                                               pe.getMessage()),
2408                       firstLineNumber, true, ldifLines, pe);
2409      }
2410      catch (final Exception e)
2411      {
2412        debugException(e);
2413        throw new LDIFException(
2414                       ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber,
2415                                                               e),
2416                       firstLineNumber, true, ldifLines, e);
2417      }
2418    }
2419    else
2420    {
2421      // Skip over any spaces leading up to the value, and then the rest of the
2422      // string is the DN.
2423      int pos = colonPos+1;
2424      while ((pos < length) && (line.charAt(pos) == ' '))
2425      {
2426        pos++;
2427      }
2428
2429      dn = line.substring(pos);
2430    }
2431
2432
2433    // An LDIF change record may contain zero or more controls, with the end of
2434    // the controls signified by the changetype.  The changetype element must be
2435    // present, unless defaultAdd is true in which case the first thing that is
2436    // neither control or changetype will trigger the start of add attribute
2437    // parsing.
2438    if (! iterator.hasNext())
2439    {
2440      throw new LDIFException(ERR_READ_CR_TOO_SHORT.get(firstLineNumber),
2441                              firstLineNumber, true, ldifLines, null);
2442    }
2443
2444    String changeType = null;
2445    ArrayList<Control> controls = null;
2446    while (true)
2447    {
2448      line = iterator.next();
2449      handleTrailingSpaces(line, dn, firstLineNumber,
2450           unparsedRecord.getTrailingSpaceBehavior());
2451      colonPos = line.indexOf(":");
2452      if (colonPos < 0)
2453      {
2454        throw new LDIFException(
2455             ERR_READ_CR_SECOND_LINE_MISSING_COLON.get(firstLineNumber),
2456             firstLineNumber, true, ldifLines, null);
2457      }
2458
2459      final String token = toLowerCase(line.substring(0, colonPos));
2460      if (token.equals("control"))
2461      {
2462        if (controls == null)
2463        {
2464          controls = new ArrayList<Control>(5);
2465        }
2466
2467        controls.add(decodeControl(line, colonPos, firstLineNumber, ldifLines,
2468             relativeBasePath));
2469      }
2470      else if (token.equals("changetype"))
2471      {
2472        changeType =
2473             decodeChangeType(line, colonPos, firstLineNumber, ldifLines);
2474        break;
2475      }
2476      else if (defaultAdd)
2477      {
2478        // The line we read wasn't a control or changetype declaration, so we'll
2479        // assume it's an attribute in an add record.  However, we're not ready
2480        // for that yet, and since we can't rewind an iterator we'll create a
2481        // new one that hasn't yet gotten to this line.
2482        changeType = "add";
2483        iterator = ldifLines.iterator();
2484        for (int i=0; i < linesRead; i++)
2485        {
2486          iterator.next();
2487        }
2488        break;
2489      }
2490      else
2491      {
2492        throw new LDIFException(
2493             ERR_READ_CR_CT_LINE_DOESNT_START_WITH_CONTROL_OR_CT.get(
2494                  firstLineNumber),
2495             firstLineNumber, true, ldifLines, null);
2496      }
2497
2498      linesRead++;
2499    }
2500
2501
2502    // Make sure that the change type is acceptable and then decode the rest of
2503    // the change record accordingly.
2504    final String lowerChangeType = toLowerCase(changeType);
2505    if (lowerChangeType.equals("add"))
2506    {
2507      // There must be at least one more line.  If not, then that's an error.
2508      // Otherwise, parse the rest of the data as attribute-value pairs.
2509      if (iterator.hasNext())
2510      {
2511        final Collection<Attribute> attrs =
2512             parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(),
2513                  unparsedRecord.getTrailingSpaceBehavior(),
2514                  unparsedRecord.getSchema(), ldifLines, iterator,
2515                  relativeBasePath, firstLineNumber);
2516        final Attribute[] attributes = new Attribute[attrs.size()];
2517        final Iterator<Attribute> attrIterator = attrs.iterator();
2518        for (int i=0; i < attributes.length; i++)
2519        {
2520          attributes[i] = attrIterator.next();
2521        }
2522
2523        return new LDIFAddChangeRecord(dn, attributes, controls);
2524      }
2525      else
2526      {
2527        throw new LDIFException(ERR_READ_CR_NO_ATTRIBUTES.get(firstLineNumber),
2528                                firstLineNumber, true, ldifLines, null);
2529      }
2530    }
2531    else if (lowerChangeType.equals("delete"))
2532    {
2533      // There shouldn't be any more data.  If there is, then that's an error.
2534      // Otherwise, we can just return the delete change record with what we
2535      // already know.
2536      if (iterator.hasNext())
2537      {
2538        throw new LDIFException(
2539                       ERR_READ_CR_EXTRA_DELETE_DATA.get(firstLineNumber),
2540                       firstLineNumber, true, ldifLines, null);
2541      }
2542      else
2543      {
2544        return new LDIFDeleteChangeRecord(dn, controls);
2545      }
2546    }
2547    else if (lowerChangeType.equals("modify"))
2548    {
2549      // There must be at least one more line.  If not, then that's an error.
2550      // Otherwise, parse the rest of the data as a set of modifications.
2551      if (iterator.hasNext())
2552      {
2553        final Modification[] mods = parseModifications(dn,
2554             unparsedRecord.getTrailingSpaceBehavior(), ldifLines, iterator,
2555             firstLineNumber, schema);
2556        return new LDIFModifyChangeRecord(dn, mods, controls);
2557      }
2558      else
2559      {
2560        throw new LDIFException(ERR_READ_CR_NO_MODS.get(firstLineNumber),
2561                                firstLineNumber, true, ldifLines, null);
2562      }
2563    }
2564    else if (lowerChangeType.equals("moddn") ||
2565             lowerChangeType.equals("modrdn"))
2566    {
2567      // There must be at least one more line.  If not, then that's an error.
2568      // Otherwise, parse the rest of the data as a set of modifications.
2569      if (iterator.hasNext())
2570      {
2571        return parseModifyDNChangeRecord(ldifLines, iterator, dn, controls,
2572             unparsedRecord.getTrailingSpaceBehavior(), firstLineNumber);
2573      }
2574      else
2575      {
2576        throw new LDIFException(ERR_READ_CR_NO_NEWRDN.get(firstLineNumber),
2577                                firstLineNumber, true, ldifLines, null);
2578      }
2579    }
2580    else
2581    {
2582      throw new LDIFException(ERR_READ_CR_INVALID_CT.get(changeType,
2583                                                         firstLineNumber),
2584                              firstLineNumber, true, ldifLines, null);
2585    }
2586  }
2587
2588
2589
2590  /**
2591   * Decodes information about a control from the provided line.
2592   *
2593   * @param  line              The line to process.
2594   * @param  colonPos          The position of the colon that separates the
2595   *                           control token string from tbe encoded control.
2596   * @param  firstLineNumber   The line number for the start of the record.
2597   * @param  ldifLines         The lines that comprise the LDIF representation
2598   *                           of the full record being parsed.
2599   * @param  relativeBasePath  The base path that will be prepended to relative
2600   *                           paths in order to obtain an absolute path.
2601   *
2602   * @return  The decoded control.
2603   *
2604   * @throws  LDIFException  If a problem is encountered while trying to decode
2605   *                         the changetype.
2606   */
2607  private static Control decodeControl(final StringBuilder line,
2608                                       final int colonPos,
2609                                       final long firstLineNumber,
2610                                       final ArrayList<StringBuilder> ldifLines,
2611                                       final String relativeBasePath)
2612          throws LDIFException
2613  {
2614    final String controlString;
2615    int length = line.length();
2616    if (length == (colonPos+1))
2617    {
2618      // The colon was the last character on the line.  This is not
2619      // acceptable.
2620      throw new LDIFException(
2621           ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber),
2622           firstLineNumber, true, ldifLines, null);
2623    }
2624    else if (line.charAt(colonPos+1) == ':')
2625    {
2626      // Skip over any spaces leading up to the value, and then the rest of
2627      // the string is the base64-encoded control representation.  This is
2628      // unusual and unnecessary, but is nevertheless acceptable.
2629      int pos = colonPos+2;
2630      while ((pos < length) && (line.charAt(pos) == ' '))
2631      {
2632        pos++;
2633      }
2634
2635      try
2636      {
2637        final byte[] controlBytes = Base64.decode(line.substring(pos));
2638        controlString =  new String(controlBytes, "UTF-8");
2639      }
2640      catch (final ParseException pe)
2641      {
2642        debugException(pe);
2643        throw new LDIFException(
2644                       ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get(
2645                            firstLineNumber, pe.getMessage()),
2646                       firstLineNumber, true, ldifLines, pe);
2647      }
2648      catch (final Exception e)
2649      {
2650        debugException(e);
2651        throw new LDIFException(
2652             ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get(firstLineNumber, e),
2653             firstLineNumber, true, ldifLines, e);
2654      }
2655    }
2656    else
2657    {
2658      // Skip over any spaces leading up to the value, and then the rest of
2659      // the string is the encoded control.
2660      int pos = colonPos+1;
2661      while ((pos < length) && (line.charAt(pos) == ' '))
2662      {
2663        pos++;
2664      }
2665
2666      controlString = line.substring(pos);
2667    }
2668
2669    // If the resulting control definition is empty, then that's invalid.
2670    if (controlString.length() == 0)
2671    {
2672      throw new LDIFException(
2673           ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber),
2674           firstLineNumber, true, ldifLines, null);
2675    }
2676
2677
2678    // The first element of the control must be the OID, and it must be followed
2679    // by a space (to separate it from the criticality), a colon (to separate it
2680    // from the value and indicate a default criticality of false), or the end
2681    // of the line (to indicate a default criticality of false and no value).
2682    String oid = null;
2683    boolean hasCriticality = false;
2684    boolean hasValue = false;
2685    int pos = 0;
2686    length = controlString.length();
2687    while (pos < length)
2688    {
2689      final char c = controlString.charAt(pos);
2690      if (c == ':')
2691      {
2692        // This indicates that there is no criticality and that the value
2693        // immediately follows the OID.
2694        oid = controlString.substring(0, pos++);
2695        hasValue = true;
2696        break;
2697      }
2698      else if (c == ' ')
2699      {
2700        // This indicates that there is a criticality.  We don't know anything
2701        // about the presence of a value yet.
2702        oid = controlString.substring(0, pos++);
2703        hasCriticality = true;
2704        break;
2705      }
2706      else
2707      {
2708        pos++;
2709      }
2710    }
2711
2712    if (oid == null)
2713    {
2714      // This indicates that the string representation of the control is only
2715      // the OID.
2716      return new Control(controlString, false);
2717    }
2718
2719
2720    // See if we need to read the criticality.  If so, then do so now.
2721    // Otherwise, assume a default criticality of false.
2722    final boolean isCritical;
2723    if (hasCriticality)
2724    {
2725      // Skip over any spaces before the criticality.
2726      while (controlString.charAt(pos) == ' ')
2727      {
2728        pos++;
2729      }
2730
2731      // Read until we find a colon or the end of the string.
2732      final int criticalityStartPos = pos;
2733      while (pos < length)
2734      {
2735        final char c = controlString.charAt(pos);
2736        if (c == ':')
2737        {
2738          hasValue = true;
2739          break;
2740        }
2741        else
2742        {
2743          pos++;
2744        }
2745      }
2746
2747      final String criticalityString =
2748           toLowerCase(controlString.substring(criticalityStartPos, pos));
2749      if (criticalityString.equals("true"))
2750      {
2751        isCritical = true;
2752      }
2753      else if (criticalityString.equals("false"))
2754      {
2755        isCritical = false;
2756      }
2757      else
2758      {
2759        throw new LDIFException(
2760             ERR_READ_CONTROL_LINE_INVALID_CRITICALITY.get(criticalityString,
2761                  firstLineNumber),
2762             firstLineNumber, true, ldifLines, null);
2763      }
2764
2765      if (hasValue)
2766      {
2767        pos++;
2768      }
2769    }
2770    else
2771    {
2772      isCritical = false;
2773    }
2774
2775    // See if we need to read the value.  If so, then do so now.  It may be
2776    // a string, or it may be base64-encoded.  It could conceivably even be read
2777    // from a URL.
2778    final ASN1OctetString value;
2779    if (hasValue)
2780    {
2781      // The character immediately after the colon that precedes the value may
2782      // be one of the following:
2783      // - A second colon (optionally followed by a single space) to indicate
2784      //   that the value is base64-encoded.
2785      // - A less-than symbol to indicate that the value should be read from a
2786      //   location specified by a URL.
2787      // - A single space that precedes the non-base64-encoded value.
2788      // - The first character of the non-base64-encoded value.
2789      switch (controlString.charAt(pos))
2790      {
2791        case ':':
2792          try
2793          {
2794            if (controlString.length() == (pos+1))
2795            {
2796              value = new ASN1OctetString();
2797            }
2798            else if (controlString.charAt(pos+1) == ' ')
2799            {
2800              value = new ASN1OctetString(
2801                   Base64.decode(controlString.substring(pos+2)));
2802            }
2803            else
2804            {
2805              value = new ASN1OctetString(
2806                   Base64.decode(controlString.substring(pos+1)));
2807            }
2808          }
2809          catch (final Exception e)
2810          {
2811            debugException(e);
2812            throw new LDIFException(
2813                 ERR_READ_CONTROL_LINE_CANNOT_BASE64_DECODE_VALUE.get(
2814                      firstLineNumber, getExceptionMessage(e)),
2815                 firstLineNumber, true, ldifLines, e);
2816          }
2817          break;
2818        case '<':
2819          try
2820          {
2821            final String urlString;
2822            if (controlString.charAt(pos+1) == ' ')
2823            {
2824              urlString = controlString.substring(pos+2);
2825            }
2826            else
2827            {
2828              urlString = controlString.substring(pos+1);
2829            }
2830            value = new ASN1OctetString(retrieveURLBytes(urlString,
2831                 relativeBasePath, firstLineNumber));
2832          }
2833          catch (final Exception e)
2834          {
2835            debugException(e);
2836            throw new LDIFException(
2837                 ERR_READ_CONTROL_LINE_CANNOT_RETRIEVE_VALUE_FROM_URL.get(
2838                      firstLineNumber, getExceptionMessage(e)),
2839                 firstLineNumber, true, ldifLines, e);
2840          }
2841          break;
2842        case ' ':
2843          value = new ASN1OctetString(controlString.substring(pos+1));
2844          break;
2845        default:
2846          value = new ASN1OctetString(controlString.substring(pos));
2847          break;
2848      }
2849    }
2850    else
2851    {
2852      value = null;
2853    }
2854
2855    return new Control(oid, isCritical, value);
2856  }
2857
2858
2859
2860  /**
2861   * Decodes the changetype element from the provided line.
2862   *
2863   * @param  line             The line to process.
2864   * @param  colonPos         The position of the colon that separates the
2865   *                          changetype string from its value.
2866   * @param  firstLineNumber  The line number for the start of the record.
2867   * @param  ldifLines        The lines that comprise the LDIF representation of
2868   *                          the full record being parsed.
2869   *
2870   * @return  The decoded changetype string.
2871   *
2872   * @throws  LDIFException  If a problem is encountered while trying to decode
2873   *                         the changetype.
2874   */
2875  private static String decodeChangeType(final StringBuilder line,
2876                             final int colonPos, final long firstLineNumber,
2877                             final ArrayList<StringBuilder> ldifLines)
2878          throws LDIFException
2879  {
2880    final int length = line.length();
2881    if (length == (colonPos+1))
2882    {
2883      // The colon was the last character on the line.  This is not
2884      // acceptable.
2885      throw new LDIFException(
2886           ERR_READ_CT_LINE_NO_CT_VALUE.get(firstLineNumber), firstLineNumber,
2887           true, ldifLines, null);
2888    }
2889    else if (line.charAt(colonPos+1) == ':')
2890    {
2891      // Skip over any spaces leading up to the value, and then the rest of
2892      // the string is the base64-encoded changetype.  This is unusual and
2893      // unnecessary, but is nevertheless acceptable.
2894      int pos = colonPos+2;
2895      while ((pos < length) && (line.charAt(pos) == ' '))
2896      {
2897        pos++;
2898      }
2899
2900      try
2901      {
2902        final byte[] changeTypeBytes = Base64.decode(line.substring(pos));
2903        return new String(changeTypeBytes, "UTF-8");
2904      }
2905      catch (final ParseException pe)
2906      {
2907        debugException(pe);
2908        throw new LDIFException(
2909                       ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber,
2910                                                            pe.getMessage()),
2911                       firstLineNumber, true, ldifLines, pe);
2912      }
2913      catch (final Exception e)
2914      {
2915        debugException(e);
2916        throw new LDIFException(
2917             ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, e),
2918             firstLineNumber, true, ldifLines, e);
2919      }
2920    }
2921    else
2922    {
2923      // Skip over any spaces leading up to the value, and then the rest of
2924      // the string is the changetype.
2925      int pos = colonPos+1;
2926      while ((pos < length) && (line.charAt(pos) == ' '))
2927      {
2928        pos++;
2929      }
2930
2931      return line.substring(pos);
2932    }
2933  }
2934
2935
2936
2937  /**
2938   * Parses the data available through the provided iterator as a collection of
2939   * attributes suitable for use in an entry or an add change record.
2940   *
2941   * @param  dn                      The DN of the record being read.
2942   * @param  duplicateValueBehavior  The behavior that should be exhibited if
2943   *                                 the LDIF reader encounters an entry with
2944   *                                 duplicate values.
2945   * @param  trailingSpaceBehavior   The behavior that should be exhibited when
2946   *                                 encountering attribute values which are not
2947   *                                 base64-encoded but contain trailing spaces.
2948   * @param  schema                  The schema to use when parsing the
2949   *                                 attributes, or {@code null} if none is
2950   *                                 needed.
2951   * @param  ldifLines               The lines that comprise the LDIF
2952   *                                 representation of the full record being
2953   *                                 parsed.
2954   * @param  iterator                The iterator to use to access the attribute
2955   *                                 lines.
2956   * @param  relativeBasePath        The base path that will be prepended to
2957   *                                 relative paths in order to obtain an
2958   *                                 absolute path.
2959   * @param  firstLineNumber         The line number for the start of the
2960   *                                 record.
2961   *
2962   * @return  The collection of attributes that were read.
2963   *
2964   * @throws  LDIFException  If the provided LDIF data cannot be decoded as a
2965   *                         set of attributes.
2966   */
2967  private static ArrayList<Attribute> parseAttributes(final String dn,
2968       final DuplicateValueBehavior duplicateValueBehavior,
2969       final TrailingSpaceBehavior trailingSpaceBehavior, final Schema schema,
2970       final ArrayList<StringBuilder> ldifLines,
2971       final Iterator<StringBuilder> iterator, final String relativeBasePath,
2972       final long firstLineNumber)
2973          throws LDIFException
2974  {
2975    final LinkedHashMap<String,Object> attributes =
2976         new LinkedHashMap<String,Object>(ldifLines.size());
2977    while (iterator.hasNext())
2978    {
2979      final StringBuilder line = iterator.next();
2980      handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
2981      final int colonPos = line.indexOf(":");
2982      if (colonPos <= 0)
2983      {
2984        throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber),
2985                                firstLineNumber, true, ldifLines, null);
2986      }
2987
2988      final String attributeName = line.substring(0, colonPos);
2989      final String lowerName     = toLowerCase(attributeName);
2990
2991      final MatchingRule matchingRule;
2992      if (schema == null)
2993      {
2994        matchingRule = CaseIgnoreStringMatchingRule.getInstance();
2995      }
2996      else
2997      {
2998        matchingRule =
2999             MatchingRule.selectEqualityMatchingRule(attributeName, schema);
3000      }
3001
3002      Attribute attr;
3003      final LDIFAttribute ldifAttr;
3004      final Object attrObject = attributes.get(lowerName);
3005      if (attrObject == null)
3006      {
3007        attr     = null;
3008        ldifAttr = null;
3009      }
3010      else
3011      {
3012        if (attrObject instanceof Attribute)
3013        {
3014          attr     = (Attribute) attrObject;
3015          ldifAttr = new LDIFAttribute(attr.getName(), matchingRule,
3016                                       attr.getRawValues()[0]);
3017          attributes.put(lowerName, ldifAttr);
3018        }
3019        else
3020        {
3021          attr     = null;
3022          ldifAttr = (LDIFAttribute) attrObject;
3023        }
3024      }
3025
3026      final int length = line.length();
3027      if (length == (colonPos+1))
3028      {
3029        // This means that the attribute has a zero-length value, which is
3030        // acceptable.
3031        if (attrObject == null)
3032        {
3033          attr = new Attribute(attributeName, matchingRule, "");
3034          attributes.put(lowerName, attr);
3035        }
3036        else
3037        {
3038          try
3039          {
3040            if (! ldifAttr.addValue(new ASN1OctetString(),
3041                       duplicateValueBehavior))
3042            {
3043              if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
3044              {
3045                throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
3046                     firstLineNumber, attributeName), firstLineNumber, true,
3047                     ldifLines, null);
3048              }
3049            }
3050          }
3051          catch (LDAPException le)
3052          {
3053            throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
3054                 firstLineNumber, attributeName, getExceptionMessage(le)),
3055                 firstLineNumber, true, ldifLines, le);
3056          }
3057        }
3058      }
3059      else if (line.charAt(colonPos+1) == ':')
3060      {
3061        // Skip over any spaces leading up to the value, and then the rest of
3062        // the string is the base64-encoded attribute value.
3063        int pos = colonPos+2;
3064        while ((pos < length) && (line.charAt(pos) == ' '))
3065        {
3066          pos++;
3067        }
3068
3069        try
3070        {
3071          final byte[] valueBytes = Base64.decode(line.substring(pos));
3072          if (attrObject == null)
3073          {
3074            attr = new Attribute(attributeName, matchingRule, valueBytes);
3075            attributes.put(lowerName, attr);
3076          }
3077          else
3078          {
3079            try
3080            {
3081              if (! ldifAttr.addValue(new ASN1OctetString(valueBytes),
3082                         duplicateValueBehavior))
3083              {
3084                if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
3085                {
3086                  throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
3087                       firstLineNumber, attributeName), firstLineNumber, true,
3088                       ldifLines, null);
3089                }
3090              }
3091            }
3092            catch (LDAPException le)
3093            {
3094              throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
3095                   firstLineNumber, attributeName, getExceptionMessage(le)),
3096                   firstLineNumber, true, ldifLines, le);
3097            }
3098          }
3099        }
3100        catch (final ParseException pe)
3101        {
3102          debugException(pe);
3103          throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
3104                                       attributeName,  firstLineNumber,
3105                                       pe.getMessage()),
3106                                  firstLineNumber, true, ldifLines, pe);
3107        }
3108      }
3109      else if (line.charAt(colonPos+1) == '<')
3110      {
3111        // Skip over any spaces leading up to the value, and then the rest of
3112        // the string is a URL that indicates where to get the real content.
3113        // At the present time, we'll only support the file URLs.
3114        int pos = colonPos+2;
3115        while ((pos < length) && (line.charAt(pos) == ' '))
3116        {
3117          pos++;
3118        }
3119
3120        final byte[] urlBytes;
3121        final String urlString = line.substring(pos);
3122        try
3123        {
3124          urlBytes =
3125               retrieveURLBytes(urlString, relativeBasePath, firstLineNumber);
3126        }
3127        catch (final Exception e)
3128        {
3129          debugException(e);
3130          throw new LDIFException(
3131               ERR_READ_URL_EXCEPTION.get(attributeName, urlString,
3132                    firstLineNumber, e),
3133               firstLineNumber, true, ldifLines, e);
3134        }
3135
3136        if (attrObject == null)
3137        {
3138          attr = new Attribute(attributeName, matchingRule, urlBytes);
3139          attributes.put(lowerName, attr);
3140        }
3141        else
3142        {
3143          try
3144          {
3145            if (! ldifAttr.addValue(new ASN1OctetString(urlBytes),
3146                 duplicateValueBehavior))
3147            {
3148              if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
3149              {
3150                throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
3151                     firstLineNumber, attributeName), firstLineNumber, true,
3152                     ldifLines, null);
3153              }
3154            }
3155          }
3156          catch (final LDIFException le)
3157          {
3158            debugException(le);
3159            throw le;
3160          }
3161          catch (final Exception e)
3162          {
3163            debugException(e);
3164            throw new LDIFException(
3165                 ERR_READ_URL_EXCEPTION.get(attributeName, urlString,
3166                      firstLineNumber, e),
3167                 firstLineNumber, true, ldifLines, e);
3168          }
3169        }
3170      }
3171      else
3172      {
3173        // Skip over any spaces leading up to the value, and then the rest of
3174        // the string is the value.
3175        int pos = colonPos+1;
3176        while ((pos < length) && (line.charAt(pos) == ' '))
3177        {
3178          pos++;
3179        }
3180
3181        final String valueString = line.substring(pos);
3182        if (attrObject == null)
3183        {
3184          attr = new Attribute(attributeName, matchingRule, valueString);
3185          attributes.put(lowerName, attr);
3186        }
3187        else
3188        {
3189          try
3190          {
3191            if (! ldifAttr.addValue(new ASN1OctetString(valueString),
3192                       duplicateValueBehavior))
3193            {
3194              if (duplicateValueBehavior != DuplicateValueBehavior.STRIP)
3195              {
3196                throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn,
3197                     firstLineNumber, attributeName), firstLineNumber, true,
3198                     ldifLines, null);
3199              }
3200            }
3201          }
3202          catch (LDAPException le)
3203          {
3204            throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn,
3205                 firstLineNumber, attributeName, getExceptionMessage(le)),
3206                 firstLineNumber, true, ldifLines, le);
3207          }
3208        }
3209      }
3210    }
3211
3212    final ArrayList<Attribute> attrList =
3213         new ArrayList<Attribute>(attributes.size());
3214    for (final Object o : attributes.values())
3215    {
3216      if (o instanceof Attribute)
3217      {
3218        attrList.add((Attribute) o);
3219      }
3220      else
3221      {
3222        attrList.add(((LDIFAttribute) o).toAttribute());
3223      }
3224    }
3225
3226    return attrList;
3227  }
3228
3229
3230
3231  /**
3232   * Retrieves the bytes that make up the file referenced by the given URL.
3233   *
3234   * @param  urlString         The string representation of the URL to retrieve.
3235   * @param  relativeBasePath  The base path that will be prepended to relative
3236   *                           paths in order to obtain an absolute path.
3237   * @param  firstLineNumber   The line number for the start of the record.
3238   *
3239   * @return  The bytes contained in the specified file, or an empty array if
3240   *          the specified file is empty.
3241   *
3242   * @throws  LDIFException  If the provided URL is malformed or references a
3243   *                         nonexistent file.
3244   *
3245   * @throws  IOException  If a problem is encountered while attempting to read
3246   *                       from the target file.
3247   */
3248  private static byte[] retrieveURLBytes(final String urlString,
3249                                         final String relativeBasePath,
3250                                         final long firstLineNumber)
3251          throws LDIFException, IOException
3252  {
3253    int pos;
3254    String path;
3255    final String lowerURLString = toLowerCase(urlString);
3256    if (lowerURLString.startsWith("file:/"))
3257    {
3258      pos = 6;
3259      while ((pos < urlString.length()) && (urlString.charAt(pos) == '/'))
3260      {
3261        pos++;
3262      }
3263
3264      path = urlString.substring(pos-1);
3265    }
3266    else if (lowerURLString.startsWith("file:"))
3267    {
3268      // A file: URL that doesn't include a slash will be interpreted as a
3269      // relative path.
3270      path = relativeBasePath + urlString.substring(5);
3271    }
3272    else
3273    {
3274      throw new LDIFException(ERR_READ_URL_INVALID_SCHEME.get(urlString),
3275           firstLineNumber, true);
3276    }
3277
3278    final File f = new File(path);
3279    if (! f.exists())
3280    {
3281      throw new LDIFException(
3282           ERR_READ_URL_NO_SUCH_FILE.get(urlString, f.getAbsolutePath()),
3283           firstLineNumber, true);
3284    }
3285
3286    // In order to conserve memory, we'll only allow values to be read from
3287    // files no larger than 10 megabytes.
3288    final long fileSize = f.length();
3289    if (fileSize > (10 * 1024 * 1024))
3290    {
3291      throw new LDIFException(
3292           ERR_READ_URL_FILE_TOO_LARGE.get(urlString, f.getAbsolutePath(),
3293                (10*1024*1024)),
3294           firstLineNumber, true);
3295    }
3296
3297    int fileBytesRemaining = (int) fileSize;
3298    final byte[] fileData = new byte[(int) fileSize];
3299    final FileInputStream fis = new FileInputStream(f);
3300    try
3301    {
3302      int fileBytesRead = 0;
3303      while (fileBytesRead < fileSize)
3304      {
3305        final int bytesRead =
3306             fis.read(fileData, fileBytesRead, fileBytesRemaining);
3307        if (bytesRead < 0)
3308        {
3309          // We hit the end of the file before we expected to.  This shouldn't
3310          // happen unless the file size changed since we first looked at it,
3311          // which we won't allow.
3312          throw new LDIFException(
3313               ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString,
3314                    f.getAbsolutePath()),
3315               firstLineNumber, true);
3316        }
3317
3318        fileBytesRead      += bytesRead;
3319        fileBytesRemaining -= bytesRead;
3320      }
3321
3322      if (fis.read() != -1)
3323      {
3324        // There is still more data to read.  This shouldn't happen unless the
3325        // file size changed since we first looked at it, which we won't allow.
3326        throw new LDIFException(
3327             ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString, f.getAbsolutePath()),
3328             firstLineNumber, true);
3329      }
3330    }
3331    finally
3332    {
3333      fis.close();
3334    }
3335
3336    return fileData;
3337  }
3338
3339
3340
3341  /**
3342   * Parses the data available through the provided iterator into an array of
3343   * modifications suitable for use in a modify change record.
3344   *
3345   * @param  dn                     The DN of the entry being parsed.
3346   * @param  trailingSpaceBehavior  The behavior that should be exhibited when
3347   *                                encountering attribute values which are not
3348   *                                base64-encoded but contain trailing spaces.
3349   * @param  ldifLines              The lines that comprise the LDIF
3350   *                                representation of the full record being
3351   *                                parsed.
3352   * @param  iterator               The iterator to use to access the
3353   *                                modification data.
3354   * @param  firstLineNumber        The line number for the start of the record.
3355   * @param  schema                 The schema to use in processing.
3356   *
3357   * @return  An array containing the modifications that were read.
3358   *
3359   * @throws  LDIFException  If the provided LDIF data cannot be decoded as a
3360   *                         set of modifications.
3361   */
3362  private static Modification[] parseModifications(final String dn,
3363                      final TrailingSpaceBehavior trailingSpaceBehavior,
3364                      final ArrayList<StringBuilder> ldifLines,
3365                      final Iterator<StringBuilder> iterator,
3366                      final long firstLineNumber, final Schema schema)
3367          throws LDIFException
3368  {
3369    final ArrayList<Modification> modList =
3370         new ArrayList<Modification>(ldifLines.size());
3371
3372    while (iterator.hasNext())
3373    {
3374      // The first line must start with "add:", "delete:", "replace:", or
3375      // "increment:" followed by an attribute name.
3376      StringBuilder line = iterator.next();
3377      handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3378      int colonPos = line.indexOf(":");
3379      if (colonPos < 0)
3380      {
3381        throw new LDIFException(ERR_READ_MOD_CR_NO_MODTYPE.get(firstLineNumber),
3382                                firstLineNumber, true, ldifLines, null);
3383      }
3384
3385      final ModificationType modType;
3386      final String modTypeStr = toLowerCase(line.substring(0, colonPos));
3387      if (modTypeStr.equals("add"))
3388      {
3389        modType = ModificationType.ADD;
3390      }
3391      else if (modTypeStr.equals("delete"))
3392      {
3393        modType = ModificationType.DELETE;
3394      }
3395      else if (modTypeStr.equals("replace"))
3396      {
3397        modType = ModificationType.REPLACE;
3398      }
3399      else if (modTypeStr.equals("increment"))
3400      {
3401        modType = ModificationType.INCREMENT;
3402      }
3403      else
3404      {
3405        throw new LDIFException(ERR_READ_MOD_CR_INVALID_MODTYPE.get(modTypeStr,
3406                                     firstLineNumber),
3407                                firstLineNumber, true, ldifLines, null);
3408      }
3409
3410      String attributeName;
3411      int length = line.length();
3412      if (length == (colonPos+1))
3413      {
3414        // The colon was the last character on the line.  This is not
3415        // acceptable.
3416        throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get(
3417                                     firstLineNumber),
3418                                firstLineNumber, true, ldifLines, null);
3419      }
3420      else if (line.charAt(colonPos+1) == ':')
3421      {
3422        // Skip over any spaces leading up to the value, and then the rest of
3423        // the string is the base64-encoded attribute name.
3424        int pos = colonPos+2;
3425        while ((pos < length) && (line.charAt(pos) == ' '))
3426        {
3427          pos++;
3428        }
3429
3430        try
3431        {
3432          final byte[] dnBytes = Base64.decode(line.substring(pos));
3433          attributeName = new String(dnBytes, "UTF-8");
3434        }
3435        catch (final ParseException pe)
3436        {
3437          debugException(pe);
3438          throw new LDIFException(
3439               ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get(
3440                    firstLineNumber, pe.getMessage()),
3441               firstLineNumber, true, ldifLines, pe);
3442        }
3443        catch (final Exception e)
3444        {
3445          debugException(e);
3446          throw new LDIFException(
3447               ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get(
3448                    firstLineNumber, e),
3449               firstLineNumber, true, ldifLines, e);
3450        }
3451      }
3452      else
3453      {
3454        // Skip over any spaces leading up to the value, and then the rest of
3455        // the string is the attribute name.
3456        int pos = colonPos+1;
3457        while ((pos < length) && (line.charAt(pos) == ' '))
3458        {
3459          pos++;
3460        }
3461
3462        attributeName = line.substring(pos);
3463      }
3464
3465      if (attributeName.length() == 0)
3466      {
3467        throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get(
3468                                     firstLineNumber),
3469                                firstLineNumber, true, ldifLines, null);
3470      }
3471
3472
3473      // The next zero or more lines may be the set of attribute values.  Keep
3474      // reading until we reach the end of the iterator or until we find a line
3475      // with just a "-".
3476      final ArrayList<ASN1OctetString> valueList =
3477           new ArrayList<ASN1OctetString>(ldifLines.size());
3478      while (iterator.hasNext())
3479      {
3480        line = iterator.next();
3481        handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3482        if (line.toString().equals("-"))
3483        {
3484          break;
3485        }
3486
3487        colonPos = line.indexOf(":");
3488        if (colonPos < 0)
3489        {
3490          throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber),
3491                                  firstLineNumber, true, ldifLines, null);
3492        }
3493        else if (! line.substring(0, colonPos).equalsIgnoreCase(attributeName))
3494        {
3495          // There are a couple of cases in which this might be acceptable:
3496          // - If the two names are logically equivalent, but have an alternate
3497          //   name (or OID) for the target attribute type, or if there are
3498          //   attribute options and the options are just in a different order.
3499          // - If this is the first value for the target attribute and the
3500          //   alternate name includes a "binary" option that the original
3501          //   attribute name did not have.  In this case, all subsequent values
3502          //   will also be required to have the binary option.
3503          final String alternateName = line.substring(0, colonPos);
3504
3505
3506          // Check to see if the base names are equivalent.
3507          boolean baseNameEquivalent = false;
3508          final String expectedBaseName = Attribute.getBaseName(attributeName);
3509          final String alternateBaseName = Attribute.getBaseName(alternateName);
3510          if (alternateBaseName.equalsIgnoreCase(expectedBaseName))
3511          {
3512            baseNameEquivalent = true;
3513          }
3514          else
3515          {
3516            if (schema != null)
3517            {
3518              final AttributeTypeDefinition expectedAT =
3519                   schema.getAttributeType(expectedBaseName);
3520              final AttributeTypeDefinition alternateAT =
3521                   schema.getAttributeType(alternateBaseName);
3522              if ((expectedAT != null) && (alternateAT != null) &&
3523                  expectedAT.equals(alternateAT))
3524              {
3525                baseNameEquivalent = true;
3526              }
3527            }
3528          }
3529
3530
3531          // Check to see if the attribute options are equivalent.
3532          final Set<String> expectedOptions =
3533               Attribute.getOptions(attributeName);
3534          final Set<String> lowerExpectedOptions =
3535               new HashSet<String>(expectedOptions.size());
3536          for (final String s : expectedOptions)
3537          {
3538            lowerExpectedOptions.add(toLowerCase(s));
3539          }
3540
3541          final Set<String> alternateOptions =
3542               Attribute.getOptions(alternateName);
3543          final Set<String> lowerAlternateOptions =
3544               new HashSet<String>(alternateOptions.size());
3545          for (final String s : alternateOptions)
3546          {
3547            lowerAlternateOptions.add(toLowerCase(s));
3548          }
3549
3550          final boolean optionsEquivalent =
3551               lowerAlternateOptions.equals(lowerExpectedOptions);
3552
3553
3554          if (baseNameEquivalent && optionsEquivalent)
3555          {
3556            // This is fine.  The two attribute descriptions are logically
3557            // equivalent.  We'll continue using the attribute description that
3558            // was provided first.
3559          }
3560          else if (valueList.isEmpty() && baseNameEquivalent &&
3561                   lowerAlternateOptions.remove("binary") &&
3562                   lowerAlternateOptions.equals(lowerExpectedOptions))
3563          {
3564            // This means that the provided value is the first value for the
3565            // attribute, and that the only significant difference is that the
3566            // provided attribute description included an unexpected "binary"
3567            // option.  We'll accept this, but will require any additional
3568            // values for this modification to also include the binary option,
3569            // and we'll use the binary option in the attribute that is
3570            // eventually created.
3571            attributeName = alternateName;
3572          }
3573          else
3574          {
3575            // This means that either the base names are different or the sets
3576            // of options are incompatible.  This is not acceptable.
3577            throw new LDIFException(ERR_READ_MOD_CR_ATTR_MISMATCH.get(
3578                                         firstLineNumber,
3579                                         line.substring(0, colonPos),
3580                                         attributeName),
3581                                    firstLineNumber, true, ldifLines, null);
3582          }
3583        }
3584
3585        length = line.length();
3586        final ASN1OctetString value;
3587        if (length == (colonPos+1))
3588        {
3589          // The colon was the last character on the line.  This is fine.
3590          value = new ASN1OctetString();
3591        }
3592        else if (line.charAt(colonPos+1) == ':')
3593        {
3594          // Skip over any spaces leading up to the value, and then the rest of
3595          // the string is the base64-encoded value.  This is unusual and
3596          // unnecessary, but is nevertheless acceptable.
3597          int pos = colonPos+2;
3598          while ((pos < length) && (line.charAt(pos) == ' '))
3599          {
3600            pos++;
3601          }
3602
3603          try
3604          {
3605            value = new ASN1OctetString(Base64.decode(line.substring(pos)));
3606          }
3607          catch (final ParseException pe)
3608          {
3609            debugException(pe);
3610            throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
3611                 attributeName, firstLineNumber, pe.getMessage()),
3612                 firstLineNumber, true, ldifLines, pe);
3613          }
3614          catch (final Exception e)
3615          {
3616            debugException(e);
3617            throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get(
3618                                         firstLineNumber, e),
3619                                    firstLineNumber, true, ldifLines, e);
3620          }
3621        }
3622        else
3623        {
3624          // Skip over any spaces leading up to the value, and then the rest of
3625          // the string is the value.
3626          int pos = colonPos+1;
3627          while ((pos < length) && (line.charAt(pos) == ' '))
3628          {
3629            pos++;
3630          }
3631
3632          value = new ASN1OctetString(line.substring(pos));
3633        }
3634
3635        valueList.add(value);
3636      }
3637
3638      final ASN1OctetString[] values = new ASN1OctetString[valueList.size()];
3639      valueList.toArray(values);
3640
3641      // If it's an add modification type, then there must be at least one
3642      // value.
3643      if ((modType.intValue() == ModificationType.ADD.intValue()) &&
3644          (values.length == 0))
3645      {
3646        throw new LDIFException(ERR_READ_MOD_CR_NO_ADD_VALUES.get(attributeName,
3647                                     firstLineNumber),
3648                                firstLineNumber, true, ldifLines, null);
3649      }
3650
3651      // If it's an increment modification type, then there must be exactly one
3652      // value.
3653      if ((modType.intValue() == ModificationType.INCREMENT.intValue()) &&
3654          (values.length != 1))
3655      {
3656        throw new LDIFException(ERR_READ_MOD_CR_INVALID_INCR_VALUE_COUNT.get(
3657                                     firstLineNumber, attributeName),
3658                                firstLineNumber, true, ldifLines, null);
3659      }
3660
3661      modList.add(new Modification(modType, attributeName, values));
3662    }
3663
3664    final Modification[] mods = new Modification[modList.size()];
3665    modList.toArray(mods);
3666    return mods;
3667  }
3668
3669
3670
3671  /**
3672   * Parses the data available through the provided iterator as the body of a
3673   * modify DN change record (i.e., the newrdn, deleteoldrdn, and optional
3674   * newsuperior lines).
3675   *
3676   * @param  ldifLines              The lines that comprise the LDIF
3677   *                                representation of the full record being
3678   *                                parsed.
3679   * @param  iterator               The iterator to use to access the modify DN
3680   *                                data.
3681   * @param  dn                     The current DN of the entry.
3682   * @param  controls               The set of controls to include in the change
3683   *                                record.
3684   * @param  trailingSpaceBehavior  The behavior that should be exhibited when
3685   *                                encountering attribute values which are not
3686   *                                base64-encoded but contain trailing spaces.
3687   * @param  firstLineNumber        The line number for the start of the record.
3688   *
3689   * @return  The decoded modify DN change record.
3690   *
3691   * @throws  LDIFException  If the provided LDIF data cannot be decoded as a
3692   *                         modify DN change record.
3693   */
3694  private static LDIFModifyDNChangeRecord parseModifyDNChangeRecord(
3695       final ArrayList<StringBuilder> ldifLines,
3696       final Iterator<StringBuilder> iterator, final String dn,
3697       final List<Control> controls,
3698       final TrailingSpaceBehavior trailingSpaceBehavior,
3699       final long firstLineNumber)
3700       throws LDIFException
3701  {
3702    // The next line must be the new RDN, and it must start with "newrdn:".
3703    StringBuilder line = iterator.next();
3704    handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3705    int colonPos = line.indexOf(":");
3706    if ((colonPos < 0) ||
3707        (! line.substring(0, colonPos).equalsIgnoreCase("newrdn")))
3708    {
3709      throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_COLON.get(
3710                                   firstLineNumber),
3711                              firstLineNumber, true, ldifLines, null);
3712    }
3713
3714    final String newRDN;
3715    int length = line.length();
3716    if (length == (colonPos+1))
3717    {
3718      // The colon was the last character on the line.  This is not acceptable.
3719      throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get(
3720                                   firstLineNumber),
3721                              firstLineNumber, true, ldifLines, null);
3722    }
3723    else if (line.charAt(colonPos+1) == ':')
3724    {
3725      // Skip over any spaces leading up to the value, and then the rest of the
3726      // string is the base64-encoded new RDN.
3727      int pos = colonPos+2;
3728      while ((pos < length) && (line.charAt(pos) == ' '))
3729      {
3730        pos++;
3731      }
3732
3733      try
3734      {
3735        final byte[] dnBytes = Base64.decode(line.substring(pos));
3736        newRDN = new String(dnBytes, "UTF-8");
3737      }
3738      catch (final ParseException pe)
3739      {
3740        debugException(pe);
3741        throw new LDIFException(
3742             ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber,
3743                                                               pe.getMessage()),
3744             firstLineNumber, true, ldifLines, pe);
3745      }
3746      catch (final Exception e)
3747      {
3748        debugException(e);
3749        throw new LDIFException(
3750             ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber,
3751                                                               e),
3752             firstLineNumber, true, ldifLines, e);
3753      }
3754    }
3755    else
3756    {
3757      // Skip over any spaces leading up to the value, and then the rest of the
3758      // string is the new RDN.
3759      int pos = colonPos+1;
3760      while ((pos < length) && (line.charAt(pos) == ' '))
3761      {
3762        pos++;
3763      }
3764
3765      newRDN = line.substring(pos);
3766    }
3767
3768    if (newRDN.length() == 0)
3769    {
3770      throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get(
3771                                   firstLineNumber),
3772                              firstLineNumber, true, ldifLines, null);
3773    }
3774
3775
3776    // The next line must be the deleteOldRDN flag, and it must start with
3777    // 'deleteoldrdn:'.
3778    if (! iterator.hasNext())
3779    {
3780      throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get(
3781                                   firstLineNumber),
3782                              firstLineNumber, true, ldifLines, null);
3783    }
3784
3785    line = iterator.next();
3786    handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3787    colonPos = line.indexOf(":");
3788    if ((colonPos < 0) ||
3789        (! line.substring(0, colonPos).equalsIgnoreCase("deleteoldrdn")))
3790    {
3791      throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get(
3792                                   firstLineNumber),
3793                              firstLineNumber, true, ldifLines, null);
3794    }
3795
3796    final String deleteOldRDNStr;
3797    length = line.length();
3798    if (length == (colonPos+1))
3799    {
3800      // The colon was the last character on the line.  This is not acceptable.
3801      throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_VALUE.get(
3802                                   firstLineNumber),
3803                              firstLineNumber, true, ldifLines, null);
3804    }
3805    else if (line.charAt(colonPos+1) == ':')
3806    {
3807      // Skip over any spaces leading up to the value, and then the rest of the
3808      // string is the base64-encoded value.  This is unusual and
3809      // unnecessary, but is nevertheless acceptable.
3810      int pos = colonPos+2;
3811      while ((pos < length) && (line.charAt(pos) == ' '))
3812      {
3813        pos++;
3814      }
3815
3816      try
3817      {
3818        final byte[] changeTypeBytes = Base64.decode(line.substring(pos));
3819        deleteOldRDNStr = new String(changeTypeBytes, "UTF-8");
3820      }
3821      catch (final ParseException pe)
3822      {
3823        debugException(pe);
3824        throw new LDIFException(
3825             ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get(
3826                  firstLineNumber, pe.getMessage()),
3827             firstLineNumber, true, ldifLines, pe);
3828      }
3829      catch (final Exception e)
3830      {
3831        debugException(e);
3832        throw new LDIFException(
3833             ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get(
3834                  firstLineNumber, e),
3835             firstLineNumber, true, ldifLines, e);
3836      }
3837    }
3838    else
3839    {
3840      // Skip over any spaces leading up to the value, and then the rest of the
3841      // string is the value.
3842      int pos = colonPos+1;
3843      while ((pos < length) && (line.charAt(pos) == ' '))
3844      {
3845        pos++;
3846      }
3847
3848      deleteOldRDNStr = line.substring(pos);
3849    }
3850
3851    final boolean deleteOldRDN;
3852    if (deleteOldRDNStr.equals("0"))
3853    {
3854      deleteOldRDN = false;
3855    }
3856    else if (deleteOldRDNStr.equals("1"))
3857    {
3858      deleteOldRDN = true;
3859    }
3860    else if (deleteOldRDNStr.equalsIgnoreCase("false") ||
3861             deleteOldRDNStr.equalsIgnoreCase("no"))
3862    {
3863      // This is technically illegal, but we'll allow it.
3864      deleteOldRDN = false;
3865    }
3866    else if (deleteOldRDNStr.equalsIgnoreCase("true") ||
3867             deleteOldRDNStr.equalsIgnoreCase("yes"))
3868    {
3869      // This is also technically illegal, but we'll allow it.
3870      deleteOldRDN = false;
3871    }
3872    else
3873    {
3874      throw new LDIFException(ERR_READ_MODDN_CR_INVALID_DELOLDRDN.get(
3875                                   deleteOldRDNStr, firstLineNumber),
3876                              firstLineNumber, true, ldifLines, null);
3877    }
3878
3879
3880    // If there is another line, then it must be the new superior DN and it must
3881    // start with "newsuperior:".  If this is absent, then it's fine.
3882    final String newSuperiorDN;
3883    if (iterator.hasNext())
3884    {
3885      line = iterator.next();
3886      handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior);
3887      colonPos = line.indexOf(":");
3888      if ((colonPos < 0) ||
3889          (! line.substring(0, colonPos).equalsIgnoreCase("newsuperior")))
3890      {
3891        throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWSUPERIOR_COLON.get(
3892                                     firstLineNumber),
3893                                firstLineNumber, true, ldifLines, null);
3894      }
3895
3896      length = line.length();
3897      if (length == (colonPos+1))
3898      {
3899        // The colon was the last character on the line.  This is fine.
3900        newSuperiorDN = "";
3901      }
3902      else if (line.charAt(colonPos+1) == ':')
3903      {
3904        // Skip over any spaces leading up to the value, and then the rest of
3905        // the string is the base64-encoded new superior DN.
3906        int pos = colonPos+2;
3907        while ((pos < length) && (line.charAt(pos) == ' '))
3908        {
3909          pos++;
3910        }
3911
3912        try
3913        {
3914          final byte[] dnBytes = Base64.decode(line.substring(pos));
3915          newSuperiorDN = new String(dnBytes, "UTF-8");
3916        }
3917        catch (final ParseException pe)
3918        {
3919          debugException(pe);
3920          throw new LDIFException(
3921               ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get(
3922                    firstLineNumber, pe.getMessage()),
3923               firstLineNumber, true, ldifLines, pe);
3924        }
3925        catch (final Exception e)
3926        {
3927          debugException(e);
3928          throw new LDIFException(
3929               ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get(
3930                    firstLineNumber, e),
3931               firstLineNumber, true, ldifLines, e);
3932        }
3933      }
3934      else
3935      {
3936        // Skip over any spaces leading up to the value, and then the rest of
3937        // the string is the new superior DN.
3938        int pos = colonPos+1;
3939        while ((pos < length) && (line.charAt(pos) == ' '))
3940        {
3941          pos++;
3942        }
3943
3944        newSuperiorDN = line.substring(pos);
3945      }
3946    }
3947    else
3948    {
3949      newSuperiorDN = null;
3950    }
3951
3952
3953    // There must not be any more lines.
3954    if (iterator.hasNext())
3955    {
3956      throw new LDIFException(ERR_READ_CR_EXTRA_MODDN_DATA.get(firstLineNumber),
3957                              firstLineNumber, true, ldifLines, null);
3958    }
3959
3960    return new LDIFModifyDNChangeRecord(dn, newRDN, deleteOldRDN,
3961         newSuperiorDN, controls);
3962  }
3963
3964
3965
3966  /**
3967   * Examines the line contained in the provided buffer to determine whether it
3968   * may contain one or more illegal trailing spaces.  If it does, then those
3969   * spaces will either be stripped out or an exception will be thrown to
3970   * indicate that they are illegal.
3971   *
3972   * @param  buffer                 The buffer to be examined.
3973   * @param  dn                     The DN of the LDIF record being parsed.  It
3974   *                                may be {@code null} if the DN is not yet
3975   *                                known (e.g., because the provided line is
3976   *                                expected to contain that DN).
3977   * @param  firstLineNumber        The approximate line number in the LDIF
3978   *                                source on which the LDIF record begins.
3979   * @param  trailingSpaceBehavior  The behavior that should be exhibited when
3980   *                                encountering attribute values which are not
3981   *                                base64-encoded but contain trailing spaces.
3982   *
3983   * @throws  LDIFException  If the line contained in the provided buffer ends
3984   *                         with one or more illegal trailing spaces and
3985   *                         {@code stripTrailingSpaces} was provided with a
3986   *                         value of {@code false}.
3987   */
3988  private static void handleTrailingSpaces(final StringBuilder buffer,
3989                           final String dn, final long firstLineNumber,
3990                           final TrailingSpaceBehavior trailingSpaceBehavior)
3991          throws LDIFException
3992  {
3993    int pos = buffer.length() - 1;
3994    boolean trailingFound = false;
3995    while ((pos >= 0) && (buffer.charAt(pos) == ' '))
3996    {
3997      trailingFound = true;
3998      pos--;
3999    }
4000
4001    if (trailingFound && (buffer.charAt(pos) != ':'))
4002    {
4003      switch (trailingSpaceBehavior)
4004      {
4005        case STRIP:
4006          buffer.setLength(pos+1);
4007          break;
4008
4009        case REJECT:
4010          if (dn == null)
4011          {
4012            throw new LDIFException(
4013                 ERR_READ_ILLEGAL_TRAILING_SPACE_WITHOUT_DN.get(firstLineNumber,
4014                      buffer.toString()),
4015                 firstLineNumber, true);
4016          }
4017          else
4018          {
4019            throw new LDIFException(
4020                 ERR_READ_ILLEGAL_TRAILING_SPACE_WITH_DN.get(dn,
4021                      firstLineNumber, buffer.toString()),
4022                 firstLineNumber, true);
4023          }
4024
4025        case RETAIN:
4026        default:
4027          // No action will be taken.
4028          break;
4029      }
4030    }
4031  }
4032
4033
4034
4035  /**
4036   * This represents an unparsed LDIFRecord.  It stores the line number of the
4037   * first line of the record and each line of the record.
4038   */
4039  private static final class UnparsedLDIFRecord
4040  {
4041    private final ArrayList<StringBuilder> lineList;
4042    private final long firstLineNumber;
4043    private final Exception failureCause;
4044    private final boolean isEOF;
4045    private final DuplicateValueBehavior duplicateValueBehavior;
4046    private final Schema schema;
4047    private final TrailingSpaceBehavior trailingSpaceBehavior;
4048
4049
4050
4051    /**
4052     * Constructor.
4053     *
4054     * @param  lineList                The lines that comprise the LDIF record.
4055     * @param  duplicateValueBehavior  The behavior to exhibit if the entry
4056     *                                 contains duplicate attribute values.
4057     * @param  trailingSpaceBehavior   Specifies the behavior to exhibit when
4058     *                                 encountering trailing spaces in
4059     *                                 non-base64-encoded attribute values.
4060     * @param  schema                  The schema to use when parsing, if
4061     *                                 applicable.
4062     * @param  firstLineNumber         The first line number of the LDIF record.
4063     */
4064    private UnparsedLDIFRecord(final ArrayList<StringBuilder> lineList,
4065                 final DuplicateValueBehavior duplicateValueBehavior,
4066                 final TrailingSpaceBehavior trailingSpaceBehavior,
4067                 final Schema schema, final long firstLineNumber)
4068    {
4069      this.lineList               = lineList;
4070      this.firstLineNumber        = firstLineNumber;
4071      this.duplicateValueBehavior = duplicateValueBehavior;
4072      this.trailingSpaceBehavior  = trailingSpaceBehavior;
4073      this.schema                 = schema;
4074
4075      failureCause = null;
4076      isEOF =
4077           (firstLineNumber < 0) || ((lineList != null) && lineList.isEmpty());
4078    }
4079
4080
4081
4082    /**
4083     * Constructor.
4084     *
4085     * @param failureCause  The Exception thrown when reading from the input.
4086     */
4087    private UnparsedLDIFRecord(final Exception failureCause)
4088    {
4089      this.failureCause = failureCause;
4090
4091      lineList               = null;
4092      firstLineNumber        = 0;
4093      duplicateValueBehavior = DuplicateValueBehavior.REJECT;
4094      trailingSpaceBehavior  = TrailingSpaceBehavior.REJECT;
4095      schema                 = null;
4096      isEOF                  = false;
4097    }
4098
4099
4100
4101    /**
4102     * Return the lines that comprise the LDIF record.
4103     *
4104     * @return  The lines that comprise the LDIF record.
4105     */
4106    private ArrayList<StringBuilder> getLineList()
4107    {
4108      return lineList;
4109    }
4110
4111
4112
4113    /**
4114     * Retrieves the behavior to exhibit when encountering duplicate attribute
4115     * values.
4116     *
4117     * @return  The behavior to exhibit when encountering duplicate attribute
4118     *          values.
4119     */
4120    private DuplicateValueBehavior getDuplicateValueBehavior()
4121    {
4122      return duplicateValueBehavior;
4123    }
4124
4125
4126
4127    /**
4128     * Retrieves the behavior that should be exhibited when encountering
4129     * attribute values which are not base64-encoded but contain trailing
4130     * spaces.  The LDIF specification strongly recommends that any value which
4131     * legitimately contains trailing spaces be base64-encoded, but the LDAP SDK
4132     * LDIF parser may be configured to automatically strip these spaces, to
4133     * preserve them, or to reject any entry or change record containing them.
4134     *
4135     * @return  The behavior that should be exhibited when encountering
4136     *          attribute values which are not base64-encoded but contain
4137     *          trailing spaces.
4138     */
4139    private TrailingSpaceBehavior getTrailingSpaceBehavior()
4140    {
4141      return trailingSpaceBehavior;
4142    }
4143
4144
4145
4146    /**
4147     * Retrieves the schema that should be used when parsing the record, if
4148     * applicable.
4149     *
4150     * @return  The schema that should be used when parsing the record, or
4151     *          {@code null} if none should be used.
4152     */
4153    private Schema getSchema()
4154    {
4155      return schema;
4156    }
4157
4158
4159
4160    /**
4161     * Return the first line number of the LDIF record.
4162     *
4163     * @return  The first line number of the LDIF record.
4164     */
4165    private long getFirstLineNumber()
4166    {
4167      return firstLineNumber;
4168    }
4169
4170
4171
4172    /**
4173     * Return {@code true} iff the end of the input was reached.
4174     *
4175     * @return  {@code true} iff the end of the input was reached.
4176     */
4177    private boolean isEOF()
4178    {
4179      return isEOF;
4180    }
4181
4182
4183
4184    /**
4185     * Returns the reason that reading the record lines failed.  This normally
4186     * is only non-null if something bad happened to the input stream (like
4187     * a disk read error).
4188     *
4189     * @return  The reason that reading the record lines failed.
4190     */
4191    private Exception getFailureCause()
4192    {
4193      return failureCause;
4194    }
4195  }
4196
4197
4198  /**
4199   * When processing in asynchronous mode, this thread is responsible for
4200   * reading the raw unparsed records from the input and submitting them for
4201   * processing.
4202   */
4203  private final class LineReaderThread
4204       extends Thread
4205  {
4206    /**
4207     * Constructor.
4208     */
4209    private LineReaderThread()
4210    {
4211      super("Asynchronous LDIF line reader");
4212      setDaemon(true);
4213    }
4214
4215
4216
4217    /**
4218     * Reads raw, unparsed records from the input and submits them for
4219     * processing until the input is finished or closed.
4220     */
4221    @Override()
4222    public void run()
4223    {
4224      try
4225      {
4226        boolean stopProcessing = false;
4227        while (!stopProcessing)
4228        {
4229          UnparsedLDIFRecord unparsedRecord = null;
4230          try
4231          {
4232            unparsedRecord = readUnparsedRecord();
4233          }
4234          catch (IOException e)
4235          {
4236            debugException(e);
4237            unparsedRecord = new UnparsedLDIFRecord(e);
4238            stopProcessing = true;
4239          }
4240          catch (Exception e)
4241          {
4242            debugException(e);
4243            unparsedRecord = new UnparsedLDIFRecord(e);
4244          }
4245
4246          try
4247          {
4248            asyncParser.submit(unparsedRecord);
4249          }
4250          catch (InterruptedException e)
4251          {
4252            debugException(e);
4253            // If this thread is interrupted, then someone wants us to stop
4254            // processing, so that's what we'll do.
4255            stopProcessing = true;
4256          }
4257
4258          if ((unparsedRecord == null) || (unparsedRecord.isEOF()))
4259          {
4260            stopProcessing = true;
4261          }
4262        }
4263      }
4264      finally
4265      {
4266        try
4267        {
4268          asyncParser.shutdown();
4269        }
4270        catch (InterruptedException e)
4271        {
4272          debugException(e);
4273        }
4274        finally
4275        {
4276          asyncParsingComplete.set(true);
4277        }
4278      }
4279    }
4280  }
4281
4282
4283
4284  /**
4285   * Used to parse Records asynchronously.
4286   */
4287  private final class RecordParser implements Processor<UnparsedLDIFRecord,
4288                                                        LDIFRecord>
4289  {
4290    /**
4291     * {@inheritDoc}
4292     */
4293    public LDIFRecord process(final UnparsedLDIFRecord input)
4294           throws LDIFException
4295    {
4296      LDIFRecord record = decodeRecord(input, relativeBasePath, schema);
4297
4298      if ((record instanceof Entry) && (entryTranslator != null))
4299      {
4300        record = entryTranslator.translate((Entry) record,
4301             input.getFirstLineNumber());
4302
4303        if (record == null)
4304        {
4305          record = SKIP_ENTRY;
4306        }
4307      }
4308      if ((record instanceof LDIFChangeRecord) &&
4309          (changeRecordTranslator != null))
4310      {
4311        record = changeRecordTranslator.translate((LDIFChangeRecord) record,
4312             input.getFirstLineNumber());
4313
4314        if (record == null)
4315        {
4316          record = SKIP_ENTRY;
4317        }
4318      }
4319      return record;
4320    }
4321  }
4322}