001/* 002 * Copyright 2007-2016 UnboundID Corp. 003 * All Rights Reserved. 004 */ 005/* 006 * Copyright (C) 2008-2016 UnboundID Corp. 007 * 008 * This program is free software; you can redistribute it and/or modify 009 * it under the terms of the GNU General Public License (GPLv2 only) 010 * or the terms of the GNU Lesser General Public License (LGPLv2.1 only) 011 * as published by the Free Software Foundation. 012 * 013 * This program is distributed in the hope that it will be useful, 014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 016 * GNU General Public License for more details. 017 * 018 * You should have received a copy of the GNU General Public License 019 * along with this program; if not, see <http://www.gnu.org/licenses>. 020 */ 021package com.unboundid.ldif; 022 023 024 025import java.io.BufferedReader; 026import java.io.BufferedWriter; 027import java.io.File; 028import java.io.FileInputStream; 029import java.io.FileWriter; 030import java.io.InputStream; 031import java.io.InputStreamReader; 032import java.io.IOException; 033import java.text.ParseException; 034import java.util.ArrayList; 035import java.util.Collection; 036import java.util.Iterator; 037import java.util.HashSet; 038import java.util.LinkedHashMap; 039import java.util.List; 040import java.util.Set; 041import java.util.concurrent.BlockingQueue; 042import java.util.concurrent.ArrayBlockingQueue; 043import java.util.concurrent.TimeUnit; 044import java.util.concurrent.atomic.AtomicBoolean; 045import java.nio.charset.Charset; 046 047import com.unboundid.asn1.ASN1OctetString; 048import com.unboundid.ldap.matchingrules.CaseIgnoreStringMatchingRule; 049import com.unboundid.ldap.matchingrules.MatchingRule; 050import com.unboundid.ldap.sdk.Attribute; 051import com.unboundid.ldap.sdk.Control; 052import com.unboundid.ldap.sdk.Entry; 053import com.unboundid.ldap.sdk.Modification; 054import com.unboundid.ldap.sdk.ModificationType; 055import com.unboundid.ldap.sdk.LDAPException; 056import com.unboundid.ldap.sdk.schema.AttributeTypeDefinition; 057import com.unboundid.ldap.sdk.schema.Schema; 058import com.unboundid.util.AggregateInputStream; 059import com.unboundid.util.Base64; 060import com.unboundid.util.LDAPSDKThreadFactory; 061import com.unboundid.util.ThreadSafety; 062import com.unboundid.util.ThreadSafetyLevel; 063import com.unboundid.util.parallel.AsynchronousParallelProcessor; 064import com.unboundid.util.parallel.Result; 065import com.unboundid.util.parallel.ParallelProcessor; 066import com.unboundid.util.parallel.Processor; 067 068import static com.unboundid.ldif.LDIFMessages.*; 069import static com.unboundid.util.Debug.*; 070import static com.unboundid.util.StaticUtils.*; 071import static com.unboundid.util.Validator.*; 072 073/** 074 * This class provides an LDIF reader, which can be used to read and decode 075 * entries and change records from a data source using the LDAP Data Interchange 076 * Format as per <A HREF="http://www.ietf.org/rfc/rfc2849.txt">RFC 2849</A>. 077 * <BR> 078 * This class is not synchronized. If multiple threads read from the 079 * LDIFReader, they must be synchronized externally. 080 * <BR><BR> 081 * <H2>Example</H2> 082 * The following example iterates through all entries contained in an LDIF file 083 * and attempts to add them to a directory server: 084 * <PRE> 085 * LDIFReader ldifReader = new LDIFReader(pathToLDIFFile); 086 * 087 * int entriesRead = 0; 088 * int entriesAdded = 0; 089 * int errorsEncountered = 0; 090 * while (true) 091 * { 092 * Entry entry; 093 * try 094 * { 095 * entry = ldifReader.readEntry(); 096 * if (entry == null) 097 * { 098 * // All entries have been read. 099 * break; 100 * } 101 * 102 * entriesRead++; 103 * } 104 * catch (LDIFException le) 105 * { 106 * errorsEncountered++; 107 * if (le.mayContinueReading()) 108 * { 109 * // A recoverable error occurred while attempting to read a change 110 * // record, at or near line number le.getLineNumber() 111 * // The entry will be skipped, but we'll try to keep reading from the 112 * // LDIF file. 113 * continue; 114 * } 115 * else 116 * { 117 * // An unrecoverable error occurred while attempting to read an entry 118 * // at or near line number le.getLineNumber() 119 * // No further LDIF processing will be performed. 120 * break; 121 * } 122 * } 123 * catch (IOException ioe) 124 * { 125 * // An I/O error occurred while attempting to read from the LDIF file. 126 * // No further LDIF processing will be performed. 127 * errorsEncountered++; 128 * break; 129 * } 130 * 131 * LDAPResult addResult; 132 * try 133 * { 134 * addResult = connection.add(entry); 135 * // If we got here, then the change should have been processed 136 * // successfully. 137 * entriesAdded++; 138 * } 139 * catch (LDAPException le) 140 * { 141 * // If we got here, then the change attempt failed. 142 * addResult = le.toLDAPResult(); 143 * errorsEncountered++; 144 * } 145 * } 146 * 147 * ldifReader.close(); 148 * </PRE> 149 */ 150@ThreadSafety(level=ThreadSafetyLevel.NOT_THREADSAFE) 151public final class LDIFReader 152{ 153 /** 154 * The default buffer size (128KB) that will be used when reading from the 155 * data source. 156 */ 157 public static final int DEFAULT_BUFFER_SIZE = 128 * 1024; 158 159 160 161 /* 162 * When processing asynchronously, this determines how many of the allocated 163 * worker threads are used to parse each batch of read entries. 164 */ 165 private static final int ASYNC_MIN_PER_PARSING_THREAD = 3; 166 167 168 169 /** 170 * When processing asynchronously, this specifies the size of the pending and 171 * completed queues. 172 */ 173 private static final int ASYNC_QUEUE_SIZE = 500; 174 175 176 177 /** 178 * Special entry used internally to signal that the LDIFReaderEntryTranslator 179 * has signalled that a read Entry should be skipped by returning null, 180 * which normally implies EOF. 181 */ 182 private static final Entry SKIP_ENTRY = new Entry("cn=skipped"); 183 184 185 186 /** 187 * The default base path that will be prepended to relative paths. It will 188 * end with a trailing slash. 189 */ 190 private static final String DEFAULT_RELATIVE_BASE_PATH; 191 static 192 { 193 final File currentDir; 194 String currentDirString = System.getProperty("user.dir"); 195 if (currentDirString == null) 196 { 197 currentDir = new File("."); 198 } 199 else 200 { 201 currentDir = new File(currentDirString); 202 } 203 204 final String currentDirAbsolutePath = currentDir.getAbsolutePath(); 205 if (currentDirAbsolutePath.endsWith(File.separator)) 206 { 207 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath; 208 } 209 else 210 { 211 DEFAULT_RELATIVE_BASE_PATH = currentDirAbsolutePath + File.separator; 212 } 213 } 214 215 216 217 // The buffered reader that will be used to read LDIF data. 218 private final BufferedReader reader; 219 220 // The behavior that should be exhibited when encountering duplicate attribute 221 // values. 222 private volatile DuplicateValueBehavior duplicateValueBehavior; 223 224 // A line number counter. 225 private long lineNumberCounter = 0; 226 227 // The change record translator to use, if any. 228 private final LDIFReaderChangeRecordTranslator changeRecordTranslator; 229 230 // The entry translator to use, if any. 231 private final LDIFReaderEntryTranslator entryTranslator; 232 233 // The schema that will be used when processing, if applicable. 234 private Schema schema; 235 236 // Specifies the base path that will be prepended to relative paths for file 237 // URLs. 238 private volatile String relativeBasePath; 239 240 // The behavior that should be exhibited with regard to illegal trailing 241 // spaces in attribute values. 242 private volatile TrailingSpaceBehavior trailingSpaceBehavior; 243 244 // True iff we are processing asynchronously. 245 private final boolean isAsync; 246 247 // 248 // The following only apply to asynchronous processing. 249 // 250 251 // Parses entries asynchronously. 252 private final AsynchronousParallelProcessor<UnparsedLDIFRecord, LDIFRecord> 253 asyncParser; 254 255 // Set to true when the end of the input is reached. 256 private final AtomicBoolean asyncParsingComplete; 257 258 // The records that have been read and parsed. 259 private final BlockingQueue<Result<UnparsedLDIFRecord, LDIFRecord>> 260 asyncParsedRecords; 261 262 263 264 /** 265 * Creates a new LDIF reader that will read data from the specified file. 266 * 267 * @param path The path to the file from which the data is to be read. It 268 * must not be {@code null}. 269 * 270 * @throws IOException If a problem occurs while opening the file for 271 * reading. 272 */ 273 public LDIFReader(final String path) 274 throws IOException 275 { 276 this(new FileInputStream(path)); 277 } 278 279 280 281 /** 282 * Creates a new LDIF reader that will read data from the specified file 283 * and parses the LDIF records asynchronously using the specified number of 284 * threads. 285 * 286 * @param path The path to the file from which the data is to be read. It 287 * must not be {@code null}. 288 * @param numParseThreads If this value is greater than zero, then the 289 * specified number of threads will be used to 290 * asynchronously read and parse the LDIF file. 291 * 292 * @throws IOException If a problem occurs while opening the file for 293 * reading. 294 * 295 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 296 * constructor for more details about asynchronous processing. 297 */ 298 public LDIFReader(final String path, final int numParseThreads) 299 throws IOException 300 { 301 this(new FileInputStream(path), numParseThreads); 302 } 303 304 305 306 /** 307 * Creates a new LDIF reader that will read data from the specified file. 308 * 309 * @param file The file from which the data is to be read. It must not be 310 * {@code null}. 311 * 312 * @throws IOException If a problem occurs while opening the file for 313 * reading. 314 */ 315 public LDIFReader(final File file) 316 throws IOException 317 { 318 this(new FileInputStream(file)); 319 } 320 321 322 323 /** 324 * Creates a new LDIF reader that will read data from the specified file 325 * and optionally parses the LDIF records asynchronously using the specified 326 * number of threads. 327 * 328 * @param file The file from which the data is to be read. It 329 * must not be {@code null}. 330 * @param numParseThreads If this value is greater than zero, then the 331 * specified number of threads will be used to 332 * asynchronously read and parse the LDIF file. 333 * 334 * @throws IOException If a problem occurs while opening the file for 335 * reading. 336 */ 337 public LDIFReader(final File file, final int numParseThreads) 338 throws IOException 339 { 340 this(new FileInputStream(file), numParseThreads); 341 } 342 343 344 345 /** 346 * Creates a new LDIF reader that will read data from the specified files in 347 * the order in which they are provided and optionally parses the LDIF records 348 * asynchronously using the specified number of threads. 349 * 350 * @param files The files from which the data is to be read. It 351 * must not be {@code null} or empty. 352 * @param numParseThreads If this value is greater than zero, then the 353 * specified number of threads will be used to 354 * asynchronously read and parse the LDIF file. 355 * @param entryTranslator The LDIFReaderEntryTranslator to apply to entries 356 * before they are returned. This is normally 357 * {@code null}, which causes entries to be returned 358 * unaltered. This is particularly useful when 359 * parsing the input file in parallel because the 360 * entry translation is also done in parallel. 361 * 362 * @throws IOException If a problem occurs while opening the file for 363 * reading. 364 */ 365 public LDIFReader(final File[] files, final int numParseThreads, 366 final LDIFReaderEntryTranslator entryTranslator) 367 throws IOException 368 { 369 this(files, numParseThreads, entryTranslator, null); 370 } 371 372 373 374 /** 375 * Creates a new LDIF reader that will read data from the specified files in 376 * the order in which they are provided and optionally parses the LDIF records 377 * asynchronously using the specified number of threads. 378 * 379 * @param files The files from which the data is to be 380 * read. It must not be {@code null} or 381 * empty. 382 * @param numParseThreads If this value is greater than zero, then 383 * the specified number of threads will be 384 * used to asynchronously read and parse the 385 * LDIF file. 386 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 387 * entries before they are returned. This is 388 * normally {@code null}, which causes entries 389 * to be returned unaltered. This is 390 * particularly useful when parsing the input 391 * file in parallel because the entry 392 * translation is also done in parallel. 393 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 394 * apply to change records before they are 395 * returned. This is normally {@code null}, 396 * which causes change records to be returned 397 * unaltered. This is particularly useful 398 * when parsing the input file in parallel 399 * because the change record translation is 400 * also done in parallel. 401 * 402 * @throws IOException If a problem occurs while opening the file for 403 * reading. 404 */ 405 public LDIFReader(final File[] files, final int numParseThreads, 406 final LDIFReaderEntryTranslator entryTranslator, 407 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 408 throws IOException 409 { 410 this(createAggregateInputStream(files), numParseThreads, entryTranslator, 411 changeRecordTranslator); 412 } 413 414 415 416 /** 417 * Creates a new aggregate input stream that will read data from the specified 418 * files. If there are multiple files, then a "padding" file will be inserted 419 * between them to ensure that there is at least one blank line between the 420 * end of one file and the beginning of another. 421 * 422 * @param files The files from which the data is to be read. It must not be 423 * {@code null} or empty. 424 * 425 * @return The input stream to use to read data from the provided files. 426 * 427 * @throws IOException If a problem is encountered while attempting to 428 * create the input stream. 429 */ 430 private static InputStream createAggregateInputStream(final File... files) 431 throws IOException 432 { 433 if (files.length == 0) 434 { 435 throw new IOException(ERR_READ_NO_LDIF_FILES.get()); 436 } 437 else if (files.length == 1) 438 { 439 return new FileInputStream(files[0]); 440 } 441 else 442 { 443 final File spacerFile = 444 File.createTempFile("ldif-reader-spacer", ".ldif"); 445 spacerFile.deleteOnExit(); 446 447 final BufferedWriter spacerWriter = 448 new BufferedWriter(new FileWriter(spacerFile)); 449 try 450 { 451 spacerWriter.newLine(); 452 spacerWriter.newLine(); 453 } 454 finally 455 { 456 spacerWriter.close(); 457 } 458 459 final File[] returnArray = new File[(files.length * 2) - 1]; 460 returnArray[0] = files[0]; 461 462 int pos = 1; 463 for (int i=1; i < files.length; i++) 464 { 465 returnArray[pos++] = spacerFile; 466 returnArray[pos++] = files[i]; 467 } 468 469 return new AggregateInputStream(returnArray); 470 } 471 } 472 473 474 475 /** 476 * Creates a new LDIF reader that will read data from the provided input 477 * stream. 478 * 479 * @param inputStream The input stream from which the data is to be read. 480 * It must not be {@code null}. 481 */ 482 public LDIFReader(final InputStream inputStream) 483 { 484 this(inputStream, 0); 485 } 486 487 488 489 /** 490 * Creates a new LDIF reader that will read data from the specified stream 491 * and parses the LDIF records asynchronously using the specified number of 492 * threads. 493 * 494 * @param inputStream The input stream from which the data is to be read. 495 * It must not be {@code null}. 496 * @param numParseThreads If this value is greater than zero, then the 497 * specified number of threads will be used to 498 * asynchronously read and parse the LDIF file. 499 * 500 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 501 * constructor for more details about asynchronous processing. 502 */ 503 public LDIFReader(final InputStream inputStream, final int numParseThreads) 504 { 505 // UTF-8 is required by RFC 2849. Java guarantees it's always available. 506 this(new BufferedReader(new InputStreamReader(inputStream, 507 Charset.forName("UTF-8")), 508 DEFAULT_BUFFER_SIZE), 509 numParseThreads); 510 } 511 512 513 514 /** 515 * Creates a new LDIF reader that will read data from the specified stream 516 * and parses the LDIF records asynchronously using the specified number of 517 * threads. 518 * 519 * @param inputStream The input stream from which the data is to be read. 520 * It must not be {@code null}. 521 * @param numParseThreads If this value is greater than zero, then the 522 * specified number of threads will be used to 523 * asynchronously read and parse the LDIF file. 524 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read 525 * entries before they are returned. This is normally 526 * {@code null}, which causes entries to be returned 527 * unaltered. This is particularly useful when parsing 528 * the input file in parallel because the entry 529 * translation is also done in parallel. 530 * 531 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 532 * constructor for more details about asynchronous processing. 533 */ 534 public LDIFReader(final InputStream inputStream, final int numParseThreads, 535 final LDIFReaderEntryTranslator entryTranslator) 536 { 537 this(inputStream, numParseThreads, entryTranslator, null); 538 } 539 540 541 542 /** 543 * Creates a new LDIF reader that will read data from the specified stream 544 * and parses the LDIF records asynchronously using the specified number of 545 * threads. 546 * 547 * @param inputStream The input stream from which the data is to 548 * be read. It must not be {@code null}. 549 * @param numParseThreads If this value is greater than zero, then 550 * the specified number of threads will be 551 * used to asynchronously read and parse the 552 * LDIF file. 553 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 554 * entries before they are returned. This is 555 * normally {@code null}, which causes entries 556 * to be returned unaltered. This is 557 * particularly useful when parsing the input 558 * file in parallel because the entry 559 * translation is also done in parallel. 560 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 561 * apply to change records before they are 562 * returned. This is normally {@code null}, 563 * which causes change records to be returned 564 * unaltered. This is particularly useful 565 * when parsing the input file in parallel 566 * because the change record translation is 567 * also done in parallel. 568 * 569 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 570 * constructor for more details about asynchronous processing. 571 */ 572 public LDIFReader(final InputStream inputStream, final int numParseThreads, 573 final LDIFReaderEntryTranslator entryTranslator, 574 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 575 { 576 // UTF-8 is required by RFC 2849. Java guarantees it's always available. 577 this(new BufferedReader( 578 new InputStreamReader(inputStream, Charset.forName("UTF-8")), 579 DEFAULT_BUFFER_SIZE), 580 numParseThreads, entryTranslator, changeRecordTranslator); 581 } 582 583 584 585 /** 586 * Creates a new LDIF reader that will use the provided buffered reader to 587 * read the LDIF data. The encoding of the underlying Reader must be set to 588 * "UTF-8" as required by RFC 2849. 589 * 590 * @param reader The buffered reader that will be used to read the LDIF 591 * data. It must not be {@code null}. 592 */ 593 public LDIFReader(final BufferedReader reader) 594 { 595 this(reader, 0); 596 } 597 598 599 600 /** 601 * Creates a new LDIF reader that will read data from the specified buffered 602 * reader and parses the LDIF records asynchronously using the specified 603 * number of threads. The encoding of the underlying Reader must be set to 604 * "UTF-8" as required by RFC 2849. 605 * 606 * @param reader The buffered reader that will be used to read the LDIF data. 607 * It must not be {@code null}. 608 * @param numParseThreads If this value is greater than zero, then the 609 * specified number of threads will be used to 610 * asynchronously read and parse the LDIF file. 611 * 612 * @see #LDIFReader(BufferedReader, int, LDIFReaderEntryTranslator) 613 * constructor for more details about asynchronous processing. 614 */ 615 public LDIFReader(final BufferedReader reader, final int numParseThreads) 616 { 617 this(reader, numParseThreads, null); 618 } 619 620 621 622 /** 623 * Creates a new LDIF reader that will read data from the specified buffered 624 * reader and parses the LDIF records asynchronously using the specified 625 * number of threads. The encoding of the underlying Reader must be set to 626 * "UTF-8" as required by RFC 2849. 627 * 628 * @param reader The buffered reader that will be used to read the LDIF data. 629 * It must not be {@code null}. 630 * @param numParseThreads If this value is greater than zero, then the 631 * specified number of threads will be used to 632 * asynchronously read and parse the LDIF file. 633 * This should only be set to greater than zero when 634 * performance analysis has demonstrated that reading 635 * and parsing the LDIF is a bottleneck. The default 636 * synchronous processing is normally fast enough. 637 * There is little benefit in passing in a value 638 * greater than four (unless there is an 639 * LDIFReaderEntryTranslator that does time-consuming 640 * processing). A value of zero implies the 641 * default behavior of reading and parsing LDIF 642 * records synchronously when one of the read 643 * methods is called. 644 * @param entryTranslator The LDIFReaderEntryTranslator to apply to read 645 * entries before they are returned. This is normally 646 * {@code null}, which causes entries to be returned 647 * unaltered. This is particularly useful when parsing 648 * the input file in parallel because the entry 649 * translation is also done in parallel. 650 */ 651 public LDIFReader(final BufferedReader reader, 652 final int numParseThreads, 653 final LDIFReaderEntryTranslator entryTranslator) 654 { 655 this(reader, numParseThreads, entryTranslator, null); 656 } 657 658 659 660 /** 661 * Creates a new LDIF reader that will read data from the specified buffered 662 * reader and parses the LDIF records asynchronously using the specified 663 * number of threads. The encoding of the underlying Reader must be set to 664 * "UTF-8" as required by RFC 2849. 665 * 666 * @param reader The buffered reader that will be used to 667 * read the LDIF data. It must not be 668 * {@code null}. 669 * @param numParseThreads If this value is greater than zero, then 670 * the specified number of threads will be 671 * used to asynchronously read and parse the 672 * LDIF file. 673 * @param entryTranslator The LDIFReaderEntryTranslator to apply to 674 * entries before they are returned. This is 675 * normally {@code null}, which causes entries 676 * to be returned unaltered. This is 677 * particularly useful when parsing the input 678 * file in parallel because the entry 679 * translation is also done in parallel. 680 * @param changeRecordTranslator The LDIFReaderChangeRecordTranslator to 681 * apply to change records before they are 682 * returned. This is normally {@code null}, 683 * which causes change records to be returned 684 * unaltered. This is particularly useful 685 * when parsing the input file in parallel 686 * because the change record translation is 687 * also done in parallel. 688 */ 689 public LDIFReader(final BufferedReader reader, final int numParseThreads, 690 final LDIFReaderEntryTranslator entryTranslator, 691 final LDIFReaderChangeRecordTranslator changeRecordTranslator) 692 { 693 ensureNotNull(reader); 694 ensureTrue(numParseThreads >= 0, 695 "LDIFReader.numParseThreads must not be negative."); 696 697 this.reader = reader; 698 this.entryTranslator = entryTranslator; 699 this.changeRecordTranslator = changeRecordTranslator; 700 701 duplicateValueBehavior = DuplicateValueBehavior.STRIP; 702 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT; 703 704 relativeBasePath = DEFAULT_RELATIVE_BASE_PATH; 705 706 if (numParseThreads == 0) 707 { 708 isAsync = false; 709 asyncParser = null; 710 asyncParsingComplete = null; 711 asyncParsedRecords = null; 712 } 713 else 714 { 715 isAsync = true; 716 asyncParsingComplete = new AtomicBoolean(false); 717 718 // Decodes entries in parallel. 719 final LDAPSDKThreadFactory threadFactory = 720 new LDAPSDKThreadFactory("LDIFReader Worker", true, null); 721 final ParallelProcessor<UnparsedLDIFRecord, LDIFRecord> parallelParser = 722 new ParallelProcessor<UnparsedLDIFRecord, LDIFRecord>( 723 new RecordParser(), threadFactory, numParseThreads, 724 ASYNC_MIN_PER_PARSING_THREAD); 725 726 final BlockingQueue<UnparsedLDIFRecord> pendingQueue = new 727 ArrayBlockingQueue<UnparsedLDIFRecord>(ASYNC_QUEUE_SIZE); 728 729 // The output queue must be a little more than twice as big as the input 730 // queue to more easily handle being shutdown in the middle of processing 731 // when the queues are full and threads are blocked. 732 asyncParsedRecords = new ArrayBlockingQueue 733 <Result<UnparsedLDIFRecord, LDIFRecord>>(2 * ASYNC_QUEUE_SIZE + 100); 734 735 asyncParser = new AsynchronousParallelProcessor 736 <UnparsedLDIFRecord, LDIFRecord>(pendingQueue, parallelParser, 737 asyncParsedRecords); 738 739 final LineReaderThread lineReaderThread = new LineReaderThread(); 740 lineReaderThread.start(); 741 } 742 } 743 744 745 746 /** 747 * Reads entries from the LDIF file with the specified path and returns them 748 * as a {@code List}. This is a convenience method that should only be used 749 * for data sets that are small enough so that running out of memory isn't a 750 * concern. 751 * 752 * @param path The path to the LDIF file containing the entries to be read. 753 * 754 * @return A list of the entries read from the given LDIF file. 755 * 756 * @throws IOException If a problem occurs while attempting to read data 757 * from the specified file. 758 * 759 * @throws LDIFException If a problem is encountered while attempting to 760 * decode data read as LDIF. 761 */ 762 public static List<Entry> readEntries(final String path) 763 throws IOException, LDIFException 764 { 765 return readEntries(new LDIFReader(path)); 766 } 767 768 769 770 /** 771 * Reads entries from the specified LDIF file and returns them as a 772 * {@code List}. This is a convenience method that should only be used for 773 * data sets that are small enough so that running out of memory isn't a 774 * concern. 775 * 776 * @param file A reference to the LDIF file containing the entries to be 777 * read. 778 * 779 * @return A list of the entries read from the given LDIF file. 780 * 781 * @throws IOException If a problem occurs while attempting to read data 782 * from the specified file. 783 * 784 * @throws LDIFException If a problem is encountered while attempting to 785 * decode data read as LDIF. 786 */ 787 public static List<Entry> readEntries(final File file) 788 throws IOException, LDIFException 789 { 790 return readEntries(new LDIFReader(file)); 791 } 792 793 794 795 /** 796 * Reads and decodes LDIF entries from the provided input stream and 797 * returns them as a {@code List}. This is a convenience method that should 798 * only be used for data sets that are small enough so that running out of 799 * memory isn't a concern. 800 * 801 * @param inputStream The input stream from which the entries should be 802 * read. The input stream will be closed before 803 * returning. 804 * 805 * @return A list of the entries read from the given input stream. 806 * 807 * @throws IOException If a problem occurs while attempting to read data 808 * from the input stream. 809 * 810 * @throws LDIFException If a problem is encountered while attempting to 811 * decode data read as LDIF. 812 */ 813 public static List<Entry> readEntries(final InputStream inputStream) 814 throws IOException, LDIFException 815 { 816 return readEntries(new LDIFReader(inputStream)); 817 } 818 819 820 821 /** 822 * Reads entries from the provided LDIF reader and returns them as a list. 823 * 824 * @param reader The reader from which the entries should be read. It will 825 * be closed before returning. 826 * 827 * @return A list of the entries read from the provided reader. 828 * 829 * @throws IOException If a problem was encountered while attempting to read 830 * data from the LDIF data source. 831 * 832 * @throws LDIFException If a problem is encountered while attempting to 833 * decode data read as LDIF. 834 */ 835 private static List<Entry> readEntries(final LDIFReader reader) 836 throws IOException, LDIFException 837 { 838 try 839 { 840 final ArrayList<Entry> entries = new ArrayList<Entry>(10); 841 while (true) 842 { 843 final Entry e = reader.readEntry(); 844 if (e == null) 845 { 846 break; 847 } 848 849 entries.add(e); 850 } 851 852 return entries; 853 } 854 finally 855 { 856 reader.close(); 857 } 858 } 859 860 861 862 /** 863 * Closes this LDIF reader and the underlying LDIF source. 864 * 865 * @throws IOException If a problem occurs while closing the underlying LDIF 866 * source. 867 */ 868 public void close() 869 throws IOException 870 { 871 reader.close(); 872 873 if (isAsync()) 874 { 875 // Closing the reader will trigger the LineReaderThread to complete, but 876 // not if it's blocked submitting the next UnparsedLDIFRecord. To avoid 877 // this, we clear out the completed output queue, which is larger than 878 // the input queue, so the LineReaderThread will stop reading and 879 // shutdown the asyncParser. 880 asyncParsedRecords.clear(); 881 } 882 } 883 884 885 886 /** 887 * Indicates whether to ignore any duplicate values encountered while reading 888 * LDIF records. 889 * 890 * @return {@code true} if duplicate values should be ignored, or 891 * {@code false} if any LDIF records containing duplicate values 892 * should be rejected. 893 * 894 * @deprecated Use the {@link #getDuplicateValueBehavior} method instead. 895 */ 896 @Deprecated() 897 public boolean ignoreDuplicateValues() 898 { 899 return (duplicateValueBehavior == DuplicateValueBehavior.STRIP); 900 } 901 902 903 904 /** 905 * Specifies whether to ignore any duplicate values encountered while reading 906 * LDIF records. 907 * 908 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 909 * attribute values encountered while reading 910 * LDIF records. 911 * 912 * @deprecated Use the {@link #setDuplicateValueBehavior} method instead. 913 */ 914 @Deprecated() 915 public void setIgnoreDuplicateValues(final boolean ignoreDuplicateValues) 916 { 917 if (ignoreDuplicateValues) 918 { 919 duplicateValueBehavior = DuplicateValueBehavior.STRIP; 920 } 921 else 922 { 923 duplicateValueBehavior = DuplicateValueBehavior.REJECT; 924 } 925 } 926 927 928 929 /** 930 * Retrieves the behavior that should be exhibited if the LDIF reader 931 * encounters an entry with duplicate values. 932 * 933 * @return The behavior that should be exhibited if the LDIF reader 934 * encounters an entry with duplicate values. 935 */ 936 public DuplicateValueBehavior getDuplicateValueBehavior() 937 { 938 return duplicateValueBehavior; 939 } 940 941 942 943 /** 944 * Specifies the behavior that should be exhibited if the LDIF reader 945 * encounters an entry with duplicate values. 946 * 947 * @param duplicateValueBehavior The behavior that should be exhibited if 948 * the LDIF reader encounters an entry with 949 * duplicate values. 950 */ 951 public void setDuplicateValueBehavior( 952 final DuplicateValueBehavior duplicateValueBehavior) 953 { 954 this.duplicateValueBehavior = duplicateValueBehavior; 955 } 956 957 958 959 /** 960 * Indicates whether to strip off any illegal trailing spaces that may appear 961 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF 962 * specification strongly recommends that any value which legitimately 963 * contains trailing spaces be base64-encoded, and any spaces which appear 964 * after the end of non-base64-encoded values may therefore be considered 965 * invalid. If any such trailing spaces are encountered in an LDIF record and 966 * they are not to be stripped, then an {@link LDIFException} will be thrown 967 * for that record. 968 * <BR><BR> 969 * Note that this applies only to spaces after the end of a value, and not to 970 * spaces which may appear at the end of a line for a value that is wrapped 971 * and continued on the next line. 972 * 973 * @return {@code true} if illegal trailing spaces should be stripped off, or 974 * {@code false} if LDIF records containing illegal trailing spaces 975 * should be rejected. 976 * 977 * @deprecated Use the {@link #getTrailingSpaceBehavior} method instead. 978 */ 979 @Deprecated() 980 public boolean stripTrailingSpaces() 981 { 982 return (trailingSpaceBehavior == TrailingSpaceBehavior.STRIP); 983 } 984 985 986 987 /** 988 * Specifies whether to strip off any illegal trailing spaces that may appear 989 * in LDIF records (e.g., after an entry DN or attribute value). The LDIF 990 * specification strongly recommends that any value which legitimately 991 * contains trailing spaces be base64-encoded, and any spaces which appear 992 * after the end of non-base64-encoded values may therefore be considered 993 * invalid. If any such trailing spaces are encountered in an LDIF record and 994 * they are not to be stripped, then an {@link LDIFException} will be thrown 995 * for that record. 996 * <BR><BR> 997 * Note that this applies only to spaces after the end of a value, and not to 998 * spaces which may appear at the end of a line for a value that is wrapped 999 * and continued on the next line. 1000 * 1001 * @param stripTrailingSpaces Indicates whether to strip off any illegal 1002 * trailing spaces, or {@code false} if LDIF 1003 * records containing them should be rejected. 1004 * 1005 * @deprecated Use the {@link #setTrailingSpaceBehavior} method instead. 1006 */ 1007 @Deprecated() 1008 public void setStripTrailingSpaces(final boolean stripTrailingSpaces) 1009 { 1010 trailingSpaceBehavior = stripTrailingSpaces 1011 ? TrailingSpaceBehavior.STRIP 1012 : TrailingSpaceBehavior.REJECT; 1013 } 1014 1015 1016 1017 /** 1018 * Retrieves the behavior that should be exhibited when encountering attribute 1019 * values which are not base64-encoded but contain trailing spaces. The LDIF 1020 * specification strongly recommends that any value which legitimately 1021 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser 1022 * may be configured to automatically strip these spaces, to preserve them, or 1023 * to reject any entry or change record containing them. 1024 * 1025 * @return The behavior that should be exhibited when encountering attribute 1026 * values which are not base64-encoded but contain trailing spaces. 1027 */ 1028 public TrailingSpaceBehavior getTrailingSpaceBehavior() 1029 { 1030 return trailingSpaceBehavior; 1031 } 1032 1033 1034 1035 /** 1036 * Specifies the behavior that should be exhibited when encountering attribute 1037 * values which are not base64-encoded but contain trailing spaces. The LDIF 1038 * specification strongly recommends that any value which legitimately 1039 * contains trailing spaces be base64-encoded, but the LDAP SDK LDIF parser 1040 * may be configured to automatically strip these spaces, to preserve them, or 1041 * to reject any entry or change record containing them. 1042 * 1043 * @param trailingSpaceBehavior The behavior that should be exhibited when 1044 * encountering attribute values which are not 1045 * base64-encoded but contain trailing spaces. 1046 */ 1047 public void setTrailingSpaceBehavior( 1048 final TrailingSpaceBehavior trailingSpaceBehavior) 1049 { 1050 this.trailingSpaceBehavior = trailingSpaceBehavior; 1051 } 1052 1053 1054 1055 /** 1056 * Retrieves the base path that will be prepended to relative paths in order 1057 * to obtain an absolute path. This will only be used for "file:" URLs that 1058 * have paths which do not begin with a slash. 1059 * 1060 * @return The base path that will be prepended to relative paths in order to 1061 * obtain an absolute path. 1062 */ 1063 public String getRelativeBasePath() 1064 { 1065 return relativeBasePath; 1066 } 1067 1068 1069 1070 /** 1071 * Specifies the base path that will be prepended to relative paths in order 1072 * to obtain an absolute path. This will only be used for "file:" URLs that 1073 * have paths which do not begin with a space. 1074 * 1075 * @param relativeBasePath The base path that will be prepended to relative 1076 * paths in order to obtain an absolute path. 1077 */ 1078 public void setRelativeBasePath(final String relativeBasePath) 1079 { 1080 setRelativeBasePath(new File(relativeBasePath)); 1081 } 1082 1083 1084 1085 /** 1086 * Specifies the base path that will be prepended to relative paths in order 1087 * to obtain an absolute path. This will only be used for "file:" URLs that 1088 * have paths which do not begin with a space. 1089 * 1090 * @param relativeBasePath The base path that will be prepended to relative 1091 * paths in order to obtain an absolute path. 1092 */ 1093 public void setRelativeBasePath(final File relativeBasePath) 1094 { 1095 final String path = relativeBasePath.getAbsolutePath(); 1096 if (path.endsWith(File.separator)) 1097 { 1098 this.relativeBasePath = path; 1099 } 1100 else 1101 { 1102 this.relativeBasePath = path + File.separator; 1103 } 1104 } 1105 1106 1107 1108 /** 1109 * Retrieves the schema that will be used when reading LDIF records, if 1110 * defined. 1111 * 1112 * @return The schema that will be used when reading LDIF records, or 1113 * {@code null} if no schema should be used and all attributes should 1114 * be treated as case-insensitive strings. 1115 */ 1116 public Schema getSchema() 1117 { 1118 return schema; 1119 } 1120 1121 1122 1123 /** 1124 * Specifies the schema that should be used when reading LDIF records. 1125 * 1126 * @param schema The schema that should be used when reading LDIF records, 1127 * or {@code null} if no schema should be used and all 1128 * attributes should be treated as case-insensitive strings. 1129 */ 1130 public void setSchema(final Schema schema) 1131 { 1132 this.schema = schema; 1133 } 1134 1135 1136 1137 /** 1138 * Reads a record from the LDIF source. It may be either an entry or an LDIF 1139 * change record. 1140 * 1141 * @return The record read from the LDIF source, or {@code null} if there are 1142 * no more entries to be read. 1143 * 1144 * @throws IOException If a problem occurs while trying to read from the 1145 * LDIF source. 1146 * 1147 * @throws LDIFException If the data read could not be parsed as an entry or 1148 * an LDIF change record. 1149 */ 1150 public LDIFRecord readLDIFRecord() 1151 throws IOException, LDIFException 1152 { 1153 if (isAsync()) 1154 { 1155 return readLDIFRecordAsync(); 1156 } 1157 else 1158 { 1159 return readLDIFRecordInternal(); 1160 } 1161 } 1162 1163 1164 1165 /** 1166 * Reads an entry from the LDIF source. 1167 * 1168 * @return The entry read from the LDIF source, or {@code null} if there are 1169 * no more entries to be read. 1170 * 1171 * @throws IOException If a problem occurs while attempting to read from the 1172 * LDIF source. 1173 * 1174 * @throws LDIFException If the data read could not be parsed as an entry. 1175 */ 1176 public Entry readEntry() 1177 throws IOException, LDIFException 1178 { 1179 if (isAsync()) 1180 { 1181 return readEntryAsync(); 1182 } 1183 else 1184 { 1185 return readEntryInternal(); 1186 } 1187 } 1188 1189 1190 1191 /** 1192 * Reads an LDIF change record from the LDIF source. The LDIF record must 1193 * have a changetype. 1194 * 1195 * @return The change record read from the LDIF source, or {@code null} if 1196 * there are no more records to be read. 1197 * 1198 * @throws IOException If a problem occurs while attempting to read from the 1199 * LDIF source. 1200 * 1201 * @throws LDIFException If the data read could not be parsed as an LDIF 1202 * change record. 1203 */ 1204 public LDIFChangeRecord readChangeRecord() 1205 throws IOException, LDIFException 1206 { 1207 return readChangeRecord(false); 1208 } 1209 1210 1211 1212 /** 1213 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF 1214 * record does not have a changetype, then it may be assumed to be an add 1215 * change record. 1216 * 1217 * @param defaultAdd Indicates whether an LDIF record not containing a 1218 * changetype should be retrieved as an add change record. 1219 * If this is {@code false} and the record read does not 1220 * include a changetype, then an {@link LDIFException} 1221 * will be thrown. 1222 * 1223 * @return The change record read from the LDIF source, or {@code null} if 1224 * there are no more records to be read. 1225 * 1226 * @throws IOException If a problem occurs while attempting to read from the 1227 * LDIF source. 1228 * 1229 * @throws LDIFException If the data read could not be parsed as an LDIF 1230 * change record. 1231 */ 1232 public LDIFChangeRecord readChangeRecord(final boolean defaultAdd) 1233 throws IOException, LDIFException 1234 { 1235 if (isAsync()) 1236 { 1237 return readChangeRecordAsync(defaultAdd); 1238 } 1239 else 1240 { 1241 return readChangeRecordInternal(defaultAdd); 1242 } 1243 } 1244 1245 1246 1247 /** 1248 * Reads the next {@code LDIFRecord}, which was read and parsed by a different 1249 * thread. 1250 * 1251 * @return The next parsed record or {@code null} if there are no more 1252 * records to read. 1253 * 1254 * @throws IOException If IOException was thrown when reading or parsing 1255 * the record. 1256 * 1257 * @throws LDIFException If LDIFException was thrown parsing the record. 1258 */ 1259 private LDIFRecord readLDIFRecordAsync() 1260 throws IOException, LDIFException 1261 { 1262 final Result<UnparsedLDIFRecord, LDIFRecord> result = 1263 readLDIFRecordResultAsync(); 1264 if (result == null) 1265 { 1266 return null; 1267 } 1268 else 1269 { 1270 return result.getOutput(); 1271 } 1272 } 1273 1274 1275 1276 /** 1277 * Reads an entry asynchronously from the LDIF source. 1278 * 1279 * @return The entry read from the LDIF source, or {@code null} if there are 1280 * no more entries to be read. 1281 * 1282 * @throws IOException If a problem occurs while attempting to read from the 1283 * LDIF source. 1284 * @throws LDIFException If the data read could not be parsed as an entry. 1285 */ 1286 private Entry readEntryAsync() 1287 throws IOException, LDIFException 1288 { 1289 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1290 LDIFRecord record = null; 1291 while (record == null) 1292 { 1293 result = readLDIFRecordResultAsync(); 1294 if (result == null) 1295 { 1296 return null; 1297 } 1298 1299 record = result.getOutput(); 1300 1301 // This is a special value that means we should skip this Entry. We have 1302 // to use something different than null because null means EOF. 1303 if (record == SKIP_ENTRY) 1304 { 1305 record = null; 1306 } 1307 } 1308 1309 if (record instanceof Entry) 1310 { 1311 return (Entry) record; 1312 } 1313 else if (record instanceof LDIFChangeRecord) 1314 { 1315 try 1316 { 1317 // Some LDIFChangeRecord can be converted to an Entry. This is really 1318 // an edge case though. 1319 return ((LDIFChangeRecord)record).toEntry(); 1320 } 1321 catch (LDIFException e) 1322 { 1323 debugException(e); 1324 final long firstLineNumber = result.getInput().getFirstLineNumber(); 1325 throw new LDIFException(e.getExceptionMessage(), 1326 firstLineNumber, true, e); 1327 } 1328 } 1329 1330 throw new AssertionError("LDIFRecords must either be an Entry or an " + 1331 "LDIFChangeRecord"); 1332 } 1333 1334 1335 1336 /** 1337 * Reads an LDIF change record from the LDIF source asynchronously. 1338 * Optionally, if the LDIF record does not have a changetype, then it may be 1339 * assumed to be an add change record. 1340 * 1341 * @param defaultAdd Indicates whether an LDIF record not containing a 1342 * changetype should be retrieved as an add change record. 1343 * If this is {@code false} and the record read does not 1344 * include a changetype, then an {@link LDIFException} will 1345 * be thrown. 1346 * 1347 * @return The change record read from the LDIF source, or {@code null} if 1348 * there are no more records to be read. 1349 * 1350 * @throws IOException If a problem occurs while attempting to read from the 1351 * LDIF source. 1352 * @throws LDIFException If the data read could not be parsed as an LDIF 1353 * change record. 1354 */ 1355 private LDIFChangeRecord readChangeRecordAsync(final boolean defaultAdd) 1356 throws IOException, LDIFException 1357 { 1358 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1359 LDIFRecord record = null; 1360 while (record == null) 1361 { 1362 result = readLDIFRecordResultAsync(); 1363 if (result == null) 1364 { 1365 return null; 1366 } 1367 1368 record = result.getOutput(); 1369 1370 // This is a special value that means we should skip this Entry. We have 1371 // to use something different than null because null means EOF. 1372 if (record == SKIP_ENTRY) 1373 { 1374 record = null; 1375 } 1376 } 1377 1378 if (record instanceof LDIFChangeRecord) 1379 { 1380 return (LDIFChangeRecord) record; 1381 } 1382 else if (record instanceof Entry) 1383 { 1384 if (defaultAdd) 1385 { 1386 return new LDIFAddChangeRecord((Entry) record); 1387 } 1388 else 1389 { 1390 final long firstLineNumber = result.getInput().getFirstLineNumber(); 1391 throw new LDIFException( 1392 ERR_READ_NOT_CHANGE_RECORD.get(firstLineNumber), firstLineNumber, 1393 true); 1394 } 1395 } 1396 1397 throw new AssertionError("LDIFRecords must either be an Entry or an " + 1398 "LDIFChangeRecord"); 1399 } 1400 1401 1402 1403 /** 1404 * Reads the next LDIF record, which was read and parsed asynchronously by 1405 * separate threads. 1406 * 1407 * @return The next LDIF record or {@code null} if there are no more records. 1408 * 1409 * @throws IOException If a problem occurs while attempting to read from the 1410 * LDIF source. 1411 * 1412 * @throws LDIFException If the data read could not be parsed as an entry. 1413 */ 1414 private Result<UnparsedLDIFRecord, LDIFRecord> readLDIFRecordResultAsync() 1415 throws IOException, LDIFException 1416 { 1417 Result<UnparsedLDIFRecord, LDIFRecord> result = null; 1418 1419 // If the asynchronous reading and parsing is complete, then we don't have 1420 // to block waiting for the next record to show up on the queue. If there 1421 // isn't a record there, then return null (EOF) right away. 1422 if (asyncParsingComplete.get()) 1423 { 1424 result = asyncParsedRecords.poll(); 1425 } 1426 else 1427 { 1428 try 1429 { 1430 // We probably could just do a asyncParsedRecords.take() here, but 1431 // there are some edge case error scenarios where 1432 // asyncParsingComplete might be set without a special EOF sentinel 1433 // Result enqueued. So to guard against this, we have a very cautious 1434 // polling interval of 1 second. During normal processing, we never 1435 // have to wait for this to expire, when there is something to do 1436 // (like shutdown). 1437 while ((result == null) && (!asyncParsingComplete.get())) 1438 { 1439 result = asyncParsedRecords.poll(1, TimeUnit.SECONDS); 1440 } 1441 1442 // There's a very small chance that we missed the value, so double-check 1443 if (result == null) 1444 { 1445 result = asyncParsedRecords.poll(); 1446 } 1447 } 1448 catch (InterruptedException e) 1449 { 1450 debugException(e); 1451 throw createIOExceptionWithCause(null, e); 1452 } 1453 } 1454 if (result == null) 1455 { 1456 return null; 1457 } 1458 1459 rethrow(result.getFailureCause()); 1460 1461 // Check if we reached the end of the input 1462 final UnparsedLDIFRecord unparsedRecord = result.getInput(); 1463 if (unparsedRecord.isEOF()) 1464 { 1465 // This might have been set already by the LineReaderThread, but 1466 // just in case it hasn't gotten to it yet, do so here. 1467 asyncParsingComplete.set(true); 1468 1469 // Enqueue this EOF result again for any other thread that might be 1470 // blocked in asyncParsedRecords.take() even though having multiple 1471 // threads call this method concurrently breaks the contract of this 1472 // class. 1473 try 1474 { 1475 asyncParsedRecords.put(result); 1476 } 1477 catch (InterruptedException e) 1478 { 1479 // We shouldn't ever get interrupted because the put won't ever block. 1480 // Once we are done reading, this is the only item left in the queue, 1481 // so we should always be able to re-enqueue it. 1482 debugException(e); 1483 } 1484 return null; 1485 } 1486 1487 return result; 1488 } 1489 1490 1491 1492 /** 1493 * Indicates whether this LDIF reader was constructed to perform asynchronous 1494 * processing. 1495 * 1496 * @return {@code true} if this LDIFReader was constructed to perform 1497 * asynchronous processing, or {@code false} if not. 1498 */ 1499 private boolean isAsync() 1500 { 1501 return isAsync; 1502 } 1503 1504 1505 1506 /** 1507 * If not {@code null}, rethrows the specified Throwable as either an 1508 * IOException or LDIFException. 1509 * 1510 * @param t The exception to rethrow. If it's {@code null}, then nothing 1511 * is thrown. 1512 * 1513 * @throws IOException If t is an IOException or a checked Exception that 1514 * is not an LDIFException. 1515 * @throws LDIFException If t is an LDIFException. 1516 */ 1517 static void rethrow(final Throwable t) 1518 throws IOException, LDIFException 1519 { 1520 if (t == null) 1521 { 1522 return; 1523 } 1524 1525 if (t instanceof IOException) 1526 { 1527 throw (IOException) t; 1528 } 1529 else if (t instanceof LDIFException) 1530 { 1531 throw (LDIFException) t; 1532 } 1533 else if (t instanceof RuntimeException) 1534 { 1535 throw (RuntimeException) t; 1536 } 1537 else if (t instanceof Error) 1538 { 1539 throw (Error) t; 1540 } 1541 else 1542 { 1543 throw createIOExceptionWithCause(null, t); 1544 } 1545 } 1546 1547 1548 1549 /** 1550 * Reads a record from the LDIF source. It may be either an entry or an LDIF 1551 * change record. 1552 * 1553 * @return The record read from the LDIF source, or {@code null} if there are 1554 * no more entries to be read. 1555 * 1556 * @throws IOException If a problem occurs while trying to read from the 1557 * LDIF source. 1558 * @throws LDIFException If the data read could not be parsed as an entry or 1559 * an LDIF change record. 1560 */ 1561 private LDIFRecord readLDIFRecordInternal() 1562 throws IOException, LDIFException 1563 { 1564 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1565 return decodeRecord(unparsedRecord, relativeBasePath, schema); 1566 } 1567 1568 1569 1570 /** 1571 * Reads an entry from the LDIF source. 1572 * 1573 * @return The entry read from the LDIF source, or {@code null} if there are 1574 * no more entries to be read. 1575 * 1576 * @throws IOException If a problem occurs while attempting to read from the 1577 * LDIF source. 1578 * @throws LDIFException If the data read could not be parsed as an entry. 1579 */ 1580 private Entry readEntryInternal() 1581 throws IOException, LDIFException 1582 { 1583 Entry e = null; 1584 while (e == null) 1585 { 1586 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1587 if (unparsedRecord.isEOF()) 1588 { 1589 return null; 1590 } 1591 1592 e = decodeEntry(unparsedRecord, relativeBasePath); 1593 debugLDIFRead(e); 1594 1595 if (entryTranslator != null) 1596 { 1597 e = entryTranslator.translate(e, unparsedRecord.getFirstLineNumber()); 1598 } 1599 } 1600 return e; 1601 } 1602 1603 1604 1605 /** 1606 * Reads an LDIF change record from the LDIF source. Optionally, if the LDIF 1607 * record does not have a changetype, then it may be assumed to be an add 1608 * change record. 1609 * 1610 * @param defaultAdd Indicates whether an LDIF record not containing a 1611 * changetype should be retrieved as an add change record. 1612 * If this is {@code false} and the record read does not 1613 * include a changetype, then an {@link LDIFException} will 1614 * be thrown. 1615 * 1616 * @return The change record read from the LDIF source, or {@code null} if 1617 * there are no more records to be read. 1618 * 1619 * @throws IOException If a problem occurs while attempting to read from the 1620 * LDIF source. 1621 * @throws LDIFException If the data read could not be parsed as an LDIF 1622 * change record. 1623 */ 1624 private LDIFChangeRecord readChangeRecordInternal(final boolean defaultAdd) 1625 throws IOException, LDIFException 1626 { 1627 LDIFChangeRecord r = null; 1628 while (r == null) 1629 { 1630 final UnparsedLDIFRecord unparsedRecord = readUnparsedRecord(); 1631 if (unparsedRecord.isEOF()) 1632 { 1633 return null; 1634 } 1635 1636 r = decodeChangeRecord(unparsedRecord, relativeBasePath, defaultAdd, 1637 schema); 1638 debugLDIFRead(r); 1639 1640 if (changeRecordTranslator != null) 1641 { 1642 r = changeRecordTranslator.translate(r, 1643 unparsedRecord.getFirstLineNumber()); 1644 } 1645 } 1646 return r; 1647 } 1648 1649 1650 1651 /** 1652 * Reads a record (either an entry or a change record) from the LDIF source 1653 * and places it in the line list. 1654 * 1655 * @return The line number for the first line of the entry that was read. 1656 * 1657 * @throws IOException If a problem occurs while attempting to read from the 1658 * LDIF source. 1659 * 1660 * @throws LDIFException If the data read could not be parsed as a valid 1661 * LDIF record. 1662 */ 1663 private UnparsedLDIFRecord readUnparsedRecord() 1664 throws IOException, LDIFException 1665 { 1666 final ArrayList<StringBuilder> lineList = new ArrayList<StringBuilder>(20); 1667 boolean lastWasComment = false; 1668 long firstLineNumber = lineNumberCounter + 1; 1669 while (true) 1670 { 1671 final String line = reader.readLine(); 1672 lineNumberCounter++; 1673 1674 if (line == null) 1675 { 1676 // We've hit the end of the LDIF source. If we haven't read any entry 1677 // data, then return null. Otherwise, the last entry wasn't followed by 1678 // a blank line, which is OK, and we should decode that entry. 1679 if (lineList.isEmpty()) 1680 { 1681 return new UnparsedLDIFRecord(new ArrayList<StringBuilder>(0), 1682 duplicateValueBehavior, trailingSpaceBehavior, schema, -1); 1683 } 1684 else 1685 { 1686 break; 1687 } 1688 } 1689 1690 if (line.length() == 0) 1691 { 1692 // It's a blank line. If we have read entry data, then this signals the 1693 // end of the entry. Otherwise, it's an extra space between entries, 1694 // which is OK. 1695 lastWasComment = false; 1696 if (lineList.isEmpty()) 1697 { 1698 firstLineNumber++; 1699 continue; 1700 } 1701 else 1702 { 1703 break; 1704 } 1705 } 1706 1707 if (line.charAt(0) == ' ') 1708 { 1709 // The line starts with a space, which means that it must be a 1710 // continuation of the previous line. This is true even if the last 1711 // line was a comment. 1712 if (lastWasComment) 1713 { 1714 // What we've read is part of a comment, so we don't care about its 1715 // content. 1716 } 1717 else if (lineList.isEmpty()) 1718 { 1719 throw new LDIFException( 1720 ERR_READ_UNEXPECTED_FIRST_SPACE.get(lineNumberCounter), 1721 lineNumberCounter, false); 1722 } 1723 else 1724 { 1725 lineList.get(lineList.size() - 1).append(line.substring(1)); 1726 lastWasComment = false; 1727 } 1728 } 1729 else if (line.charAt(0) == '#') 1730 { 1731 lastWasComment = true; 1732 } 1733 else 1734 { 1735 // We want to make sure that we skip over the "version:" line if it 1736 // exists, but that should only occur at the beginning of an entry where 1737 // it can't be confused with a possible "version" attribute. 1738 if (lineList.isEmpty() && line.startsWith("version:")) 1739 { 1740 lastWasComment = true; 1741 } 1742 else 1743 { 1744 lineList.add(new StringBuilder(line)); 1745 lastWasComment = false; 1746 } 1747 } 1748 } 1749 1750 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 1751 trailingSpaceBehavior, schema, firstLineNumber); 1752 } 1753 1754 1755 1756 /** 1757 * Decodes the provided set of LDIF lines as an entry. The provided set of 1758 * lines must contain exactly one entry. Long lines may be wrapped as per the 1759 * LDIF specification, and it is acceptable to have one or more blank lines 1760 * following the entry. A default trailing space behavior of 1761 * {@link TrailingSpaceBehavior#REJECT} will be used. 1762 * 1763 * @param ldifLines The set of lines that comprise the LDIF representation 1764 * of the entry. It must not be {@code null} or empty. 1765 * 1766 * @return The entry read from LDIF. 1767 * 1768 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1769 * entry. 1770 */ 1771 public static Entry decodeEntry(final String... ldifLines) 1772 throws LDIFException 1773 { 1774 final Entry e = decodeEntry(prepareRecord(DuplicateValueBehavior.STRIP, 1775 TrailingSpaceBehavior.REJECT, null, ldifLines), 1776 DEFAULT_RELATIVE_BASE_PATH); 1777 debugLDIFRead(e); 1778 return e; 1779 } 1780 1781 1782 1783 /** 1784 * Decodes the provided set of LDIF lines as an entry. The provided set of 1785 * lines must contain exactly one entry. Long lines may be wrapped as per the 1786 * LDIF specification, and it is acceptable to have one or more blank lines 1787 * following the entry. A default trailing space behavior of 1788 * {@link TrailingSpaceBehavior#REJECT} will be used. 1789 * 1790 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1791 * attribute values encountered while parsing. 1792 * @param schema The schema to use when parsing the record, 1793 * if applicable. 1794 * @param ldifLines The set of lines that comprise the LDIF 1795 * representation of the entry. It must not be 1796 * {@code null} or empty. 1797 * 1798 * @return The entry read from LDIF. 1799 * 1800 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1801 * entry. 1802 */ 1803 public static Entry decodeEntry(final boolean ignoreDuplicateValues, 1804 final Schema schema, 1805 final String... ldifLines) 1806 throws LDIFException 1807 { 1808 return decodeEntry(ignoreDuplicateValues, TrailingSpaceBehavior.REJECT, 1809 schema, ldifLines); 1810 } 1811 1812 1813 1814 /** 1815 * Decodes the provided set of LDIF lines as an entry. The provided set of 1816 * lines must contain exactly one entry. Long lines may be wrapped as per the 1817 * LDIF specification, and it is acceptable to have one or more blank lines 1818 * following the entry. 1819 * 1820 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1821 * attribute values encountered while parsing. 1822 * @param trailingSpaceBehavior The behavior that should be exhibited when 1823 * encountering attribute values which are not 1824 * base64-encoded but contain trailing spaces. 1825 * It must not be {@code null}. 1826 * @param schema The schema to use when parsing the record, 1827 * if applicable. 1828 * @param ldifLines The set of lines that comprise the LDIF 1829 * representation of the entry. It must not be 1830 * {@code null} or empty. 1831 * 1832 * @return The entry read from LDIF. 1833 * 1834 * @throws LDIFException If the provided LDIF data cannot be decoded as an 1835 * entry. 1836 */ 1837 public static Entry decodeEntry( 1838 final boolean ignoreDuplicateValues, 1839 final TrailingSpaceBehavior trailingSpaceBehavior, 1840 final Schema schema, 1841 final String... ldifLines) throws LDIFException 1842 { 1843 final Entry e = decodeEntry(prepareRecord( 1844 (ignoreDuplicateValues 1845 ? DuplicateValueBehavior.STRIP 1846 : DuplicateValueBehavior.REJECT), 1847 trailingSpaceBehavior, schema, ldifLines), 1848 DEFAULT_RELATIVE_BASE_PATH); 1849 debugLDIFRead(e); 1850 return e; 1851 } 1852 1853 1854 1855 /** 1856 * Decodes the provided set of LDIF lines as an LDIF change record. The 1857 * provided set of lines must contain exactly one change record and it must 1858 * include a changetype. Long lines may be wrapped as per the LDIF 1859 * specification, and it is acceptable to have one or more blank lines 1860 * following the entry. 1861 * 1862 * @param ldifLines The set of lines that comprise the LDIF representation 1863 * of the change record. It must not be {@code null} or 1864 * empty. 1865 * 1866 * @return The change record read from LDIF. 1867 * 1868 * @throws LDIFException If the provided LDIF data cannot be decoded as a 1869 * change record. 1870 */ 1871 public static LDIFChangeRecord decodeChangeRecord(final String... ldifLines) 1872 throws LDIFException 1873 { 1874 return decodeChangeRecord(false, ldifLines); 1875 } 1876 1877 1878 1879 /** 1880 * Decodes the provided set of LDIF lines as an LDIF change record. The 1881 * provided set of lines must contain exactly one change record. Long lines 1882 * may be wrapped as per the LDIF specification, and it is acceptable to have 1883 * one or more blank lines following the entry. 1884 * 1885 * @param defaultAdd Indicates whether an LDIF record not containing a 1886 * changetype should be retrieved as an add change record. 1887 * If this is {@code false} and the record read does not 1888 * include a changetype, then an {@link LDIFException} 1889 * will be thrown. 1890 * @param ldifLines The set of lines that comprise the LDIF representation 1891 * of the change record. It must not be {@code null} or 1892 * empty. 1893 * 1894 * @return The change record read from LDIF. 1895 * 1896 * @throws LDIFException If the provided LDIF data cannot be decoded as a 1897 * change record. 1898 */ 1899 public static LDIFChangeRecord decodeChangeRecord(final boolean defaultAdd, 1900 final String... ldifLines) 1901 throws LDIFException 1902 { 1903 final LDIFChangeRecord r = 1904 decodeChangeRecord( 1905 prepareRecord(DuplicateValueBehavior.STRIP, 1906 TrailingSpaceBehavior.REJECT, null, ldifLines), 1907 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null); 1908 debugLDIFRead(r); 1909 return r; 1910 } 1911 1912 1913 1914 /** 1915 * Decodes the provided set of LDIF lines as an LDIF change record. The 1916 * provided set of lines must contain exactly one change record. Long lines 1917 * may be wrapped as per the LDIF specification, and it is acceptable to have 1918 * one or more blank lines following the entry. 1919 * 1920 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1921 * attribute values encountered while parsing. 1922 * @param schema The schema to use when processing the change 1923 * record, or {@code null} if no schema should 1924 * be used and all values should be treated as 1925 * case-insensitive strings. 1926 * @param defaultAdd Indicates whether an LDIF record not 1927 * containing a changetype should be retrieved 1928 * as an add change record. If this is 1929 * {@code false} and the record read does not 1930 * include a changetype, then an 1931 * {@link LDIFException} will be thrown. 1932 * @param ldifLines The set of lines that comprise the LDIF 1933 * representation of the change record. It 1934 * must not be {@code null} or empty. 1935 * 1936 * @return The change record read from LDIF. 1937 * 1938 * @throws LDIFException If the provided LDIF data cannot be decoded as a 1939 * change record. 1940 */ 1941 public static LDIFChangeRecord decodeChangeRecord( 1942 final boolean ignoreDuplicateValues, 1943 final Schema schema, 1944 final boolean defaultAdd, 1945 final String... ldifLines) 1946 throws LDIFException 1947 { 1948 return decodeChangeRecord(ignoreDuplicateValues, 1949 TrailingSpaceBehavior.REJECT, schema, defaultAdd, ldifLines); 1950 } 1951 1952 1953 1954 /** 1955 * Decodes the provided set of LDIF lines as an LDIF change record. The 1956 * provided set of lines must contain exactly one change record. Long lines 1957 * may be wrapped as per the LDIF specification, and it is acceptable to have 1958 * one or more blank lines following the entry. 1959 * 1960 * @param ignoreDuplicateValues Indicates whether to ignore duplicate 1961 * attribute values encountered while parsing. 1962 * @param trailingSpaceBehavior The behavior that should be exhibited when 1963 * encountering attribute values which are not 1964 * base64-encoded but contain trailing spaces. 1965 * It must not be {@code null}. 1966 * @param schema The schema to use when processing the change 1967 * record, or {@code null} if no schema should 1968 * be used and all values should be treated as 1969 * case-insensitive strings. 1970 * @param defaultAdd Indicates whether an LDIF record not 1971 * containing a changetype should be retrieved 1972 * as an add change record. If this is 1973 * {@code false} and the record read does not 1974 * include a changetype, then an 1975 * {@link LDIFException} will be thrown. 1976 * @param ldifLines The set of lines that comprise the LDIF 1977 * representation of the change record. It 1978 * must not be {@code null} or empty. 1979 * 1980 * @return The change record read from LDIF. 1981 * 1982 * @throws LDIFException If the provided LDIF data cannot be decoded as a 1983 * change record. 1984 */ 1985 public static LDIFChangeRecord decodeChangeRecord( 1986 final boolean ignoreDuplicateValues, 1987 final TrailingSpaceBehavior trailingSpaceBehavior, 1988 final Schema schema, 1989 final boolean defaultAdd, 1990 final String... ldifLines) 1991 throws LDIFException 1992 { 1993 final LDIFChangeRecord r = decodeChangeRecord( 1994 prepareRecord( 1995 (ignoreDuplicateValues 1996 ? DuplicateValueBehavior.STRIP 1997 : DuplicateValueBehavior.REJECT), 1998 trailingSpaceBehavior, schema, ldifLines), 1999 DEFAULT_RELATIVE_BASE_PATH, defaultAdd, null); 2000 debugLDIFRead(r); 2001 return r; 2002 } 2003 2004 2005 2006 /** 2007 * Parses the provided set of lines into a list of {@code StringBuilder} 2008 * objects suitable for decoding into an entry or LDIF change record. 2009 * Comments will be ignored and wrapped lines will be unwrapped. 2010 * 2011 * @param duplicateValueBehavior The behavior that should be exhibited if 2012 * the LDIF reader encounters an entry with 2013 * duplicate values. 2014 * @param trailingSpaceBehavior The behavior that should be exhibited when 2015 * encountering attribute values which are not 2016 * base64-encoded but contain trailing spaces. 2017 * @param schema The schema to use when parsing the record, 2018 * if applicable. 2019 * @param ldifLines The set of lines that comprise the record 2020 * to decode. It must not be {@code null} or 2021 * empty. 2022 * 2023 * @return The prepared list of {@code StringBuilder} objects ready to be 2024 * decoded. 2025 * 2026 * @throws LDIFException If the provided lines do not contain valid LDIF 2027 * content. 2028 */ 2029 private static UnparsedLDIFRecord prepareRecord( 2030 final DuplicateValueBehavior duplicateValueBehavior, 2031 final TrailingSpaceBehavior trailingSpaceBehavior, 2032 final Schema schema, final String... ldifLines) 2033 throws LDIFException 2034 { 2035 ensureNotNull(ldifLines); 2036 ensureFalse(ldifLines.length == 0, 2037 "LDIFReader.prepareRecord.ldifLines must not be empty."); 2038 2039 boolean lastWasComment = false; 2040 final ArrayList<StringBuilder> lineList = 2041 new ArrayList<StringBuilder>(ldifLines.length); 2042 for (int i=0; i < ldifLines.length; i++) 2043 { 2044 final String line = ldifLines[i]; 2045 if (line.length() == 0) 2046 { 2047 // This is only acceptable if there are no more non-empty lines in the 2048 // array. 2049 for (int j=i+1; j < ldifLines.length; j++) 2050 { 2051 if (ldifLines[j].length() > 0) 2052 { 2053 throw new LDIFException(ERR_READ_UNEXPECTED_BLANK.get(i), i, true, 2054 ldifLines, null); 2055 } 2056 2057 // If we've gotten here, then we know that we're at the end of the 2058 // entry. If we have read data, then we can decode it as an entry. 2059 // Otherwise, there was no real data in the provided LDIF lines. 2060 if (lineList.isEmpty()) 2061 { 2062 throw new LDIFException(ERR_READ_ONLY_BLANKS.get(), 0, true, 2063 ldifLines, null); 2064 } 2065 else 2066 { 2067 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 2068 trailingSpaceBehavior, schema, 0); 2069 } 2070 } 2071 } 2072 2073 if (line.charAt(0) == ' ') 2074 { 2075 if (i > 0) 2076 { 2077 if (! lastWasComment) 2078 { 2079 lineList.get(lineList.size() - 1).append(line.substring(1)); 2080 } 2081 } 2082 else 2083 { 2084 throw new LDIFException( 2085 ERR_READ_UNEXPECTED_FIRST_SPACE_NO_NUMBER.get(), 0, 2086 true, ldifLines, null); 2087 } 2088 } 2089 else if (line.charAt(0) == '#') 2090 { 2091 lastWasComment = true; 2092 } 2093 else 2094 { 2095 lineList.add(new StringBuilder(line)); 2096 lastWasComment = false; 2097 } 2098 } 2099 2100 if (lineList.isEmpty()) 2101 { 2102 throw new LDIFException(ERR_READ_NO_DATA.get(), 0, true, ldifLines, null); 2103 } 2104 else 2105 { 2106 return new UnparsedLDIFRecord(lineList, duplicateValueBehavior, 2107 trailingSpaceBehavior, schema, 0); 2108 } 2109 } 2110 2111 2112 2113 /** 2114 * Decodes the unparsed record that was read from the LDIF source. It may be 2115 * either an entry or an LDIF change record. 2116 * 2117 * @param unparsedRecord The unparsed LDIF record that was read from the 2118 * input. It must not be {@code null} or empty. 2119 * @param relativeBasePath The base path that will be prepended to relative 2120 * paths in order to obtain an absolute path. 2121 * @param schema The schema to use when parsing. 2122 * 2123 * @return The parsed record, or {@code null} if there are no more entries to 2124 * be read. 2125 * 2126 * @throws LDIFException If the data read could not be parsed as an entry or 2127 * an LDIF change record. 2128 */ 2129 private static LDIFRecord decodeRecord( 2130 final UnparsedLDIFRecord unparsedRecord, 2131 final String relativeBasePath, 2132 final Schema schema) 2133 throws LDIFException 2134 { 2135 // If there was an error reading from the input, then we rethrow it here. 2136 final Exception readError = unparsedRecord.getFailureCause(); 2137 if (readError != null) 2138 { 2139 if (readError instanceof LDIFException) 2140 { 2141 // If the error was an LDIFException, which will normally be the case, 2142 // then rethrow it with all of the same state. We could just 2143 // throw (LDIFException) readError; 2144 // but that's considered bad form. 2145 final LDIFException ldifEx = (LDIFException) readError; 2146 throw new LDIFException(ldifEx.getMessage(), 2147 ldifEx.getLineNumber(), 2148 ldifEx.mayContinueReading(), 2149 ldifEx.getDataLines(), 2150 ldifEx.getCause()); 2151 } 2152 else 2153 { 2154 throw new LDIFException(getExceptionMessage(readError), 2155 -1, true, readError); 2156 } 2157 } 2158 2159 if (unparsedRecord.isEOF()) 2160 { 2161 return null; 2162 } 2163 2164 final ArrayList<StringBuilder> lineList = unparsedRecord.getLineList(); 2165 if (unparsedRecord.getLineList() == null) 2166 { 2167 return null; // We can get here if there was an error reading the lines. 2168 } 2169 2170 final LDIFRecord r; 2171 if (lineList.size() == 1) 2172 { 2173 r = decodeEntry(unparsedRecord, relativeBasePath); 2174 } 2175 else 2176 { 2177 final String lowerSecondLine = toLowerCase(lineList.get(1).toString()); 2178 if (lowerSecondLine.startsWith("control:") || 2179 lowerSecondLine.startsWith("changetype:")) 2180 { 2181 r = decodeChangeRecord(unparsedRecord, relativeBasePath, true, schema); 2182 } 2183 else 2184 { 2185 r = decodeEntry(unparsedRecord, relativeBasePath); 2186 } 2187 } 2188 2189 debugLDIFRead(r); 2190 return r; 2191 } 2192 2193 2194 2195 /** 2196 * Decodes the provided set of LDIF lines as an entry. The provided list must 2197 * not contain any blank lines or comments, and lines are not allowed to be 2198 * wrapped. 2199 * 2200 * @param unparsedRecord The unparsed LDIF record that was read from the 2201 * input. It must not be {@code null} or empty. 2202 * @param relativeBasePath The base path that will be prepended to relative 2203 * paths in order to obtain an absolute path. 2204 * 2205 * @return The entry read from LDIF. 2206 * 2207 * @throws LDIFException If the provided LDIF data cannot be read as an 2208 * entry. 2209 */ 2210 private static Entry decodeEntry(final UnparsedLDIFRecord unparsedRecord, 2211 final String relativeBasePath) 2212 throws LDIFException 2213 { 2214 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList(); 2215 final long firstLineNumber = unparsedRecord.getFirstLineNumber(); 2216 2217 final Iterator<StringBuilder> iterator = ldifLines.iterator(); 2218 2219 // The first line must start with either "version:" or "dn:". If the first 2220 // line starts with "version:" then the second must start with "dn:". 2221 StringBuilder line = iterator.next(); 2222 handleTrailingSpaces(line, null, firstLineNumber, 2223 unparsedRecord.getTrailingSpaceBehavior()); 2224 int colonPos = line.indexOf(":"); 2225 if ((colonPos > 0) && 2226 line.substring(0, colonPos).equalsIgnoreCase("version")) 2227 { 2228 // The first line is "version:". Under most conditions, this will be 2229 // handled by the LDIF reader, but this can happen if you call 2230 // decodeEntry with a set of data that includes a version. At any rate, 2231 // read the next line, which must specify the DN. 2232 line = iterator.next(); 2233 handleTrailingSpaces(line, null, firstLineNumber, 2234 unparsedRecord.getTrailingSpaceBehavior()); 2235 } 2236 2237 colonPos = line.indexOf(":"); 2238 if ((colonPos < 0) || 2239 (! line.substring(0, colonPos).equalsIgnoreCase("dn"))) 2240 { 2241 throw new LDIFException( 2242 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber), 2243 firstLineNumber, true, ldifLines, null); 2244 } 2245 2246 final String dn; 2247 final int length = line.length(); 2248 if (length == (colonPos+1)) 2249 { 2250 // The colon was the last character on the line. This is acceptable and 2251 // indicates that the entry has the null DN. 2252 dn = ""; 2253 } 2254 else if (line.charAt(colonPos+1) == ':') 2255 { 2256 // Skip over any spaces leading up to the value, and then the rest of the 2257 // string is the base64-encoded DN. 2258 int pos = colonPos+2; 2259 while ((pos < length) && (line.charAt(pos) == ' ')) 2260 { 2261 pos++; 2262 } 2263 2264 try 2265 { 2266 final byte[] dnBytes = Base64.decode(line.substring(pos)); 2267 dn = new String(dnBytes, "UTF-8"); 2268 } 2269 catch (final ParseException pe) 2270 { 2271 debugException(pe); 2272 throw new LDIFException( 2273 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2274 pe.getMessage()), 2275 firstLineNumber, true, ldifLines, pe); 2276 } 2277 catch (final Exception e) 2278 { 2279 debugException(e); 2280 throw new LDIFException( 2281 ERR_READ_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, e), 2282 firstLineNumber, true, ldifLines, e); 2283 } 2284 } 2285 else 2286 { 2287 // Skip over any spaces leading up to the value, and then the rest of the 2288 // string is the DN. 2289 int pos = colonPos+1; 2290 while ((pos < length) && (line.charAt(pos) == ' ')) 2291 { 2292 pos++; 2293 } 2294 2295 dn = line.substring(pos); 2296 } 2297 2298 2299 // The remaining lines must be the attributes for the entry. However, we 2300 // will allow the case in which an entry does not have any attributes, to be 2301 // able to support reading search result entries in which no attributes were 2302 // returned. 2303 if (! iterator.hasNext()) 2304 { 2305 return new Entry(dn, unparsedRecord.getSchema()); 2306 } 2307 2308 return new Entry(dn, unparsedRecord.getSchema(), 2309 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(), 2310 unparsedRecord.getTrailingSpaceBehavior(), 2311 unparsedRecord.getSchema(), ldifLines, iterator, relativeBasePath, 2312 firstLineNumber)); 2313 } 2314 2315 2316 2317 /** 2318 * Decodes the provided set of LDIF lines as a change record. The provided 2319 * list must not contain any blank lines or comments, and lines are not 2320 * allowed to be wrapped. 2321 * 2322 * @param unparsedRecord The unparsed LDIF record that was read from the 2323 * input. It must not be {@code null} or empty. 2324 * @param relativeBasePath The base path that will be prepended to relative 2325 * paths in order to obtain an absolute path. 2326 * @param defaultAdd Indicates whether an LDIF record not containing a 2327 * changetype should be retrieved as an add change 2328 * record. If this is {@code false} and the record 2329 * read does not include a changetype, then an 2330 * {@link LDIFException} will be thrown. 2331 * @param schema The schema to use in parsing. 2332 * 2333 * @return The change record read from LDIF. 2334 * 2335 * @throws LDIFException If the provided LDIF data cannot be decoded as a 2336 * change record. 2337 */ 2338 private static LDIFChangeRecord decodeChangeRecord( 2339 final UnparsedLDIFRecord unparsedRecord, 2340 final String relativeBasePath, 2341 final boolean defaultAdd, 2342 final Schema schema) 2343 throws LDIFException 2344 { 2345 final ArrayList<StringBuilder> ldifLines = unparsedRecord.getLineList(); 2346 final long firstLineNumber = unparsedRecord.getFirstLineNumber(); 2347 2348 Iterator<StringBuilder> iterator = ldifLines.iterator(); 2349 2350 // The first line must start with either "version:" or "dn:". If the first 2351 // line starts with "version:" then the second must start with "dn:". 2352 StringBuilder line = iterator.next(); 2353 handleTrailingSpaces(line, null, firstLineNumber, 2354 unparsedRecord.getTrailingSpaceBehavior()); 2355 int colonPos = line.indexOf(":"); 2356 int linesRead = 1; 2357 if ((colonPos > 0) && 2358 line.substring(0, colonPos).equalsIgnoreCase("version")) 2359 { 2360 // The first line is "version:". Under most conditions, this will be 2361 // handled by the LDIF reader, but this can happen if you call 2362 // decodeEntry with a set of data that includes a version. At any rate, 2363 // read the next line, which must specify the DN. 2364 line = iterator.next(); 2365 linesRead++; 2366 handleTrailingSpaces(line, null, firstLineNumber, 2367 unparsedRecord.getTrailingSpaceBehavior()); 2368 } 2369 2370 colonPos = line.indexOf(":"); 2371 if ((colonPos < 0) || 2372 (! line.substring(0, colonPos).equalsIgnoreCase("dn"))) 2373 { 2374 throw new LDIFException( 2375 ERR_READ_DN_LINE_DOESNT_START_WITH_DN.get(firstLineNumber), 2376 firstLineNumber, true, ldifLines, null); 2377 } 2378 2379 final String dn; 2380 int length = line.length(); 2381 if (length == (colonPos+1)) 2382 { 2383 // The colon was the last character on the line. This is acceptable and 2384 // indicates that the entry has the null DN. 2385 dn = ""; 2386 } 2387 else if (line.charAt(colonPos+1) == ':') 2388 { 2389 // Skip over any spaces leading up to the value, and then the rest of the 2390 // string is the base64-encoded DN. 2391 int pos = colonPos+2; 2392 while ((pos < length) && (line.charAt(pos) == ' ')) 2393 { 2394 pos++; 2395 } 2396 2397 try 2398 { 2399 final byte[] dnBytes = Base64.decode(line.substring(pos)); 2400 dn = new String(dnBytes, "UTF-8"); 2401 } 2402 catch (final ParseException pe) 2403 { 2404 debugException(pe); 2405 throw new LDIFException( 2406 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2407 pe.getMessage()), 2408 firstLineNumber, true, ldifLines, pe); 2409 } 2410 catch (final Exception e) 2411 { 2412 debugException(e); 2413 throw new LDIFException( 2414 ERR_READ_CR_CANNOT_BASE64_DECODE_DN.get(firstLineNumber, 2415 e), 2416 firstLineNumber, true, ldifLines, e); 2417 } 2418 } 2419 else 2420 { 2421 // Skip over any spaces leading up to the value, and then the rest of the 2422 // string is the DN. 2423 int pos = colonPos+1; 2424 while ((pos < length) && (line.charAt(pos) == ' ')) 2425 { 2426 pos++; 2427 } 2428 2429 dn = line.substring(pos); 2430 } 2431 2432 2433 // An LDIF change record may contain zero or more controls, with the end of 2434 // the controls signified by the changetype. The changetype element must be 2435 // present, unless defaultAdd is true in which case the first thing that is 2436 // neither control or changetype will trigger the start of add attribute 2437 // parsing. 2438 if (! iterator.hasNext()) 2439 { 2440 throw new LDIFException(ERR_READ_CR_TOO_SHORT.get(firstLineNumber), 2441 firstLineNumber, true, ldifLines, null); 2442 } 2443 2444 String changeType = null; 2445 ArrayList<Control> controls = null; 2446 while (true) 2447 { 2448 line = iterator.next(); 2449 handleTrailingSpaces(line, dn, firstLineNumber, 2450 unparsedRecord.getTrailingSpaceBehavior()); 2451 colonPos = line.indexOf(":"); 2452 if (colonPos < 0) 2453 { 2454 throw new LDIFException( 2455 ERR_READ_CR_SECOND_LINE_MISSING_COLON.get(firstLineNumber), 2456 firstLineNumber, true, ldifLines, null); 2457 } 2458 2459 final String token = toLowerCase(line.substring(0, colonPos)); 2460 if (token.equals("control")) 2461 { 2462 if (controls == null) 2463 { 2464 controls = new ArrayList<Control>(5); 2465 } 2466 2467 controls.add(decodeControl(line, colonPos, firstLineNumber, ldifLines, 2468 relativeBasePath)); 2469 } 2470 else if (token.equals("changetype")) 2471 { 2472 changeType = 2473 decodeChangeType(line, colonPos, firstLineNumber, ldifLines); 2474 break; 2475 } 2476 else if (defaultAdd) 2477 { 2478 // The line we read wasn't a control or changetype declaration, so we'll 2479 // assume it's an attribute in an add record. However, we're not ready 2480 // for that yet, and since we can't rewind an iterator we'll create a 2481 // new one that hasn't yet gotten to this line. 2482 changeType = "add"; 2483 iterator = ldifLines.iterator(); 2484 for (int i=0; i < linesRead; i++) 2485 { 2486 iterator.next(); 2487 } 2488 break; 2489 } 2490 else 2491 { 2492 throw new LDIFException( 2493 ERR_READ_CR_CT_LINE_DOESNT_START_WITH_CONTROL_OR_CT.get( 2494 firstLineNumber), 2495 firstLineNumber, true, ldifLines, null); 2496 } 2497 2498 linesRead++; 2499 } 2500 2501 2502 // Make sure that the change type is acceptable and then decode the rest of 2503 // the change record accordingly. 2504 final String lowerChangeType = toLowerCase(changeType); 2505 if (lowerChangeType.equals("add")) 2506 { 2507 // There must be at least one more line. If not, then that's an error. 2508 // Otherwise, parse the rest of the data as attribute-value pairs. 2509 if (iterator.hasNext()) 2510 { 2511 final Collection<Attribute> attrs = 2512 parseAttributes(dn, unparsedRecord.getDuplicateValueBehavior(), 2513 unparsedRecord.getTrailingSpaceBehavior(), 2514 unparsedRecord.getSchema(), ldifLines, iterator, 2515 relativeBasePath, firstLineNumber); 2516 final Attribute[] attributes = new Attribute[attrs.size()]; 2517 final Iterator<Attribute> attrIterator = attrs.iterator(); 2518 for (int i=0; i < attributes.length; i++) 2519 { 2520 attributes[i] = attrIterator.next(); 2521 } 2522 2523 return new LDIFAddChangeRecord(dn, attributes, controls); 2524 } 2525 else 2526 { 2527 throw new LDIFException(ERR_READ_CR_NO_ATTRIBUTES.get(firstLineNumber), 2528 firstLineNumber, true, ldifLines, null); 2529 } 2530 } 2531 else if (lowerChangeType.equals("delete")) 2532 { 2533 // There shouldn't be any more data. If there is, then that's an error. 2534 // Otherwise, we can just return the delete change record with what we 2535 // already know. 2536 if (iterator.hasNext()) 2537 { 2538 throw new LDIFException( 2539 ERR_READ_CR_EXTRA_DELETE_DATA.get(firstLineNumber), 2540 firstLineNumber, true, ldifLines, null); 2541 } 2542 else 2543 { 2544 return new LDIFDeleteChangeRecord(dn, controls); 2545 } 2546 } 2547 else if (lowerChangeType.equals("modify")) 2548 { 2549 // There must be at least one more line. If not, then that's an error. 2550 // Otherwise, parse the rest of the data as a set of modifications. 2551 if (iterator.hasNext()) 2552 { 2553 final Modification[] mods = parseModifications(dn, 2554 unparsedRecord.getTrailingSpaceBehavior(), ldifLines, iterator, 2555 firstLineNumber, schema); 2556 return new LDIFModifyChangeRecord(dn, mods, controls); 2557 } 2558 else 2559 { 2560 throw new LDIFException(ERR_READ_CR_NO_MODS.get(firstLineNumber), 2561 firstLineNumber, true, ldifLines, null); 2562 } 2563 } 2564 else if (lowerChangeType.equals("moddn") || 2565 lowerChangeType.equals("modrdn")) 2566 { 2567 // There must be at least one more line. If not, then that's an error. 2568 // Otherwise, parse the rest of the data as a set of modifications. 2569 if (iterator.hasNext()) 2570 { 2571 return parseModifyDNChangeRecord(ldifLines, iterator, dn, controls, 2572 unparsedRecord.getTrailingSpaceBehavior(), firstLineNumber); 2573 } 2574 else 2575 { 2576 throw new LDIFException(ERR_READ_CR_NO_NEWRDN.get(firstLineNumber), 2577 firstLineNumber, true, ldifLines, null); 2578 } 2579 } 2580 else 2581 { 2582 throw new LDIFException(ERR_READ_CR_INVALID_CT.get(changeType, 2583 firstLineNumber), 2584 firstLineNumber, true, ldifLines, null); 2585 } 2586 } 2587 2588 2589 2590 /** 2591 * Decodes information about a control from the provided line. 2592 * 2593 * @param line The line to process. 2594 * @param colonPos The position of the colon that separates the 2595 * control token string from tbe encoded control. 2596 * @param firstLineNumber The line number for the start of the record. 2597 * @param ldifLines The lines that comprise the LDIF representation 2598 * of the full record being parsed. 2599 * @param relativeBasePath The base path that will be prepended to relative 2600 * paths in order to obtain an absolute path. 2601 * 2602 * @return The decoded control. 2603 * 2604 * @throws LDIFException If a problem is encountered while trying to decode 2605 * the changetype. 2606 */ 2607 private static Control decodeControl(final StringBuilder line, 2608 final int colonPos, 2609 final long firstLineNumber, 2610 final ArrayList<StringBuilder> ldifLines, 2611 final String relativeBasePath) 2612 throws LDIFException 2613 { 2614 final String controlString; 2615 int length = line.length(); 2616 if (length == (colonPos+1)) 2617 { 2618 // The colon was the last character on the line. This is not 2619 // acceptable. 2620 throw new LDIFException( 2621 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber), 2622 firstLineNumber, true, ldifLines, null); 2623 } 2624 else if (line.charAt(colonPos+1) == ':') 2625 { 2626 // Skip over any spaces leading up to the value, and then the rest of 2627 // the string is the base64-encoded control representation. This is 2628 // unusual and unnecessary, but is nevertheless acceptable. 2629 int pos = colonPos+2; 2630 while ((pos < length) && (line.charAt(pos) == ' ')) 2631 { 2632 pos++; 2633 } 2634 2635 try 2636 { 2637 final byte[] controlBytes = Base64.decode(line.substring(pos)); 2638 controlString = new String(controlBytes, "UTF-8"); 2639 } 2640 catch (final ParseException pe) 2641 { 2642 debugException(pe); 2643 throw new LDIFException( 2644 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get( 2645 firstLineNumber, pe.getMessage()), 2646 firstLineNumber, true, ldifLines, pe); 2647 } 2648 catch (final Exception e) 2649 { 2650 debugException(e); 2651 throw new LDIFException( 2652 ERR_READ_CANNOT_BASE64_DECODE_CONTROL.get(firstLineNumber, e), 2653 firstLineNumber, true, ldifLines, e); 2654 } 2655 } 2656 else 2657 { 2658 // Skip over any spaces leading up to the value, and then the rest of 2659 // the string is the encoded control. 2660 int pos = colonPos+1; 2661 while ((pos < length) && (line.charAt(pos) == ' ')) 2662 { 2663 pos++; 2664 } 2665 2666 controlString = line.substring(pos); 2667 } 2668 2669 // If the resulting control definition is empty, then that's invalid. 2670 if (controlString.length() == 0) 2671 { 2672 throw new LDIFException( 2673 ERR_READ_CONTROL_LINE_NO_CONTROL_VALUE.get(firstLineNumber), 2674 firstLineNumber, true, ldifLines, null); 2675 } 2676 2677 2678 // The first element of the control must be the OID, and it must be followed 2679 // by a space (to separate it from the criticality), a colon (to separate it 2680 // from the value and indicate a default criticality of false), or the end 2681 // of the line (to indicate a default criticality of false and no value). 2682 String oid = null; 2683 boolean hasCriticality = false; 2684 boolean hasValue = false; 2685 int pos = 0; 2686 length = controlString.length(); 2687 while (pos < length) 2688 { 2689 final char c = controlString.charAt(pos); 2690 if (c == ':') 2691 { 2692 // This indicates that there is no criticality and that the value 2693 // immediately follows the OID. 2694 oid = controlString.substring(0, pos++); 2695 hasValue = true; 2696 break; 2697 } 2698 else if (c == ' ') 2699 { 2700 // This indicates that there is a criticality. We don't know anything 2701 // about the presence of a value yet. 2702 oid = controlString.substring(0, pos++); 2703 hasCriticality = true; 2704 break; 2705 } 2706 else 2707 { 2708 pos++; 2709 } 2710 } 2711 2712 if (oid == null) 2713 { 2714 // This indicates that the string representation of the control is only 2715 // the OID. 2716 return new Control(controlString, false); 2717 } 2718 2719 2720 // See if we need to read the criticality. If so, then do so now. 2721 // Otherwise, assume a default criticality of false. 2722 final boolean isCritical; 2723 if (hasCriticality) 2724 { 2725 // Skip over any spaces before the criticality. 2726 while (controlString.charAt(pos) == ' ') 2727 { 2728 pos++; 2729 } 2730 2731 // Read until we find a colon or the end of the string. 2732 final int criticalityStartPos = pos; 2733 while (pos < length) 2734 { 2735 final char c = controlString.charAt(pos); 2736 if (c == ':') 2737 { 2738 hasValue = true; 2739 break; 2740 } 2741 else 2742 { 2743 pos++; 2744 } 2745 } 2746 2747 final String criticalityString = 2748 toLowerCase(controlString.substring(criticalityStartPos, pos)); 2749 if (criticalityString.equals("true")) 2750 { 2751 isCritical = true; 2752 } 2753 else if (criticalityString.equals("false")) 2754 { 2755 isCritical = false; 2756 } 2757 else 2758 { 2759 throw new LDIFException( 2760 ERR_READ_CONTROL_LINE_INVALID_CRITICALITY.get(criticalityString, 2761 firstLineNumber), 2762 firstLineNumber, true, ldifLines, null); 2763 } 2764 2765 if (hasValue) 2766 { 2767 pos++; 2768 } 2769 } 2770 else 2771 { 2772 isCritical = false; 2773 } 2774 2775 // See if we need to read the value. If so, then do so now. It may be 2776 // a string, or it may be base64-encoded. It could conceivably even be read 2777 // from a URL. 2778 final ASN1OctetString value; 2779 if (hasValue) 2780 { 2781 // The character immediately after the colon that precedes the value may 2782 // be one of the following: 2783 // - A second colon (optionally followed by a single space) to indicate 2784 // that the value is base64-encoded. 2785 // - A less-than symbol to indicate that the value should be read from a 2786 // location specified by a URL. 2787 // - A single space that precedes the non-base64-encoded value. 2788 // - The first character of the non-base64-encoded value. 2789 switch (controlString.charAt(pos)) 2790 { 2791 case ':': 2792 try 2793 { 2794 if (controlString.length() == (pos+1)) 2795 { 2796 value = new ASN1OctetString(); 2797 } 2798 else if (controlString.charAt(pos+1) == ' ') 2799 { 2800 value = new ASN1OctetString( 2801 Base64.decode(controlString.substring(pos+2))); 2802 } 2803 else 2804 { 2805 value = new ASN1OctetString( 2806 Base64.decode(controlString.substring(pos+1))); 2807 } 2808 } 2809 catch (final Exception e) 2810 { 2811 debugException(e); 2812 throw new LDIFException( 2813 ERR_READ_CONTROL_LINE_CANNOT_BASE64_DECODE_VALUE.get( 2814 firstLineNumber, getExceptionMessage(e)), 2815 firstLineNumber, true, ldifLines, e); 2816 } 2817 break; 2818 case '<': 2819 try 2820 { 2821 final String urlString; 2822 if (controlString.charAt(pos+1) == ' ') 2823 { 2824 urlString = controlString.substring(pos+2); 2825 } 2826 else 2827 { 2828 urlString = controlString.substring(pos+1); 2829 } 2830 value = new ASN1OctetString(retrieveURLBytes(urlString, 2831 relativeBasePath, firstLineNumber)); 2832 } 2833 catch (final Exception e) 2834 { 2835 debugException(e); 2836 throw new LDIFException( 2837 ERR_READ_CONTROL_LINE_CANNOT_RETRIEVE_VALUE_FROM_URL.get( 2838 firstLineNumber, getExceptionMessage(e)), 2839 firstLineNumber, true, ldifLines, e); 2840 } 2841 break; 2842 case ' ': 2843 value = new ASN1OctetString(controlString.substring(pos+1)); 2844 break; 2845 default: 2846 value = new ASN1OctetString(controlString.substring(pos)); 2847 break; 2848 } 2849 } 2850 else 2851 { 2852 value = null; 2853 } 2854 2855 return new Control(oid, isCritical, value); 2856 } 2857 2858 2859 2860 /** 2861 * Decodes the changetype element from the provided line. 2862 * 2863 * @param line The line to process. 2864 * @param colonPos The position of the colon that separates the 2865 * changetype string from its value. 2866 * @param firstLineNumber The line number for the start of the record. 2867 * @param ldifLines The lines that comprise the LDIF representation of 2868 * the full record being parsed. 2869 * 2870 * @return The decoded changetype string. 2871 * 2872 * @throws LDIFException If a problem is encountered while trying to decode 2873 * the changetype. 2874 */ 2875 private static String decodeChangeType(final StringBuilder line, 2876 final int colonPos, final long firstLineNumber, 2877 final ArrayList<StringBuilder> ldifLines) 2878 throws LDIFException 2879 { 2880 final int length = line.length(); 2881 if (length == (colonPos+1)) 2882 { 2883 // The colon was the last character on the line. This is not 2884 // acceptable. 2885 throw new LDIFException( 2886 ERR_READ_CT_LINE_NO_CT_VALUE.get(firstLineNumber), firstLineNumber, 2887 true, ldifLines, null); 2888 } 2889 else if (line.charAt(colonPos+1) == ':') 2890 { 2891 // Skip over any spaces leading up to the value, and then the rest of 2892 // the string is the base64-encoded changetype. This is unusual and 2893 // unnecessary, but is nevertheless acceptable. 2894 int pos = colonPos+2; 2895 while ((pos < length) && (line.charAt(pos) == ' ')) 2896 { 2897 pos++; 2898 } 2899 2900 try 2901 { 2902 final byte[] changeTypeBytes = Base64.decode(line.substring(pos)); 2903 return new String(changeTypeBytes, "UTF-8"); 2904 } 2905 catch (final ParseException pe) 2906 { 2907 debugException(pe); 2908 throw new LDIFException( 2909 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, 2910 pe.getMessage()), 2911 firstLineNumber, true, ldifLines, pe); 2912 } 2913 catch (final Exception e) 2914 { 2915 debugException(e); 2916 throw new LDIFException( 2917 ERR_READ_CANNOT_BASE64_DECODE_CT.get(firstLineNumber, e), 2918 firstLineNumber, true, ldifLines, e); 2919 } 2920 } 2921 else 2922 { 2923 // Skip over any spaces leading up to the value, and then the rest of 2924 // the string is the changetype. 2925 int pos = colonPos+1; 2926 while ((pos < length) && (line.charAt(pos) == ' ')) 2927 { 2928 pos++; 2929 } 2930 2931 return line.substring(pos); 2932 } 2933 } 2934 2935 2936 2937 /** 2938 * Parses the data available through the provided iterator as a collection of 2939 * attributes suitable for use in an entry or an add change record. 2940 * 2941 * @param dn The DN of the record being read. 2942 * @param duplicateValueBehavior The behavior that should be exhibited if 2943 * the LDIF reader encounters an entry with 2944 * duplicate values. 2945 * @param trailingSpaceBehavior The behavior that should be exhibited when 2946 * encountering attribute values which are not 2947 * base64-encoded but contain trailing spaces. 2948 * @param schema The schema to use when parsing the 2949 * attributes, or {@code null} if none is 2950 * needed. 2951 * @param ldifLines The lines that comprise the LDIF 2952 * representation of the full record being 2953 * parsed. 2954 * @param iterator The iterator to use to access the attribute 2955 * lines. 2956 * @param relativeBasePath The base path that will be prepended to 2957 * relative paths in order to obtain an 2958 * absolute path. 2959 * @param firstLineNumber The line number for the start of the 2960 * record. 2961 * 2962 * @return The collection of attributes that were read. 2963 * 2964 * @throws LDIFException If the provided LDIF data cannot be decoded as a 2965 * set of attributes. 2966 */ 2967 private static ArrayList<Attribute> parseAttributes(final String dn, 2968 final DuplicateValueBehavior duplicateValueBehavior, 2969 final TrailingSpaceBehavior trailingSpaceBehavior, final Schema schema, 2970 final ArrayList<StringBuilder> ldifLines, 2971 final Iterator<StringBuilder> iterator, final String relativeBasePath, 2972 final long firstLineNumber) 2973 throws LDIFException 2974 { 2975 final LinkedHashMap<String,Object> attributes = 2976 new LinkedHashMap<String,Object>(ldifLines.size()); 2977 while (iterator.hasNext()) 2978 { 2979 final StringBuilder line = iterator.next(); 2980 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 2981 final int colonPos = line.indexOf(":"); 2982 if (colonPos <= 0) 2983 { 2984 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber), 2985 firstLineNumber, true, ldifLines, null); 2986 } 2987 2988 final String attributeName = line.substring(0, colonPos); 2989 final String lowerName = toLowerCase(attributeName); 2990 2991 final MatchingRule matchingRule; 2992 if (schema == null) 2993 { 2994 matchingRule = CaseIgnoreStringMatchingRule.getInstance(); 2995 } 2996 else 2997 { 2998 matchingRule = 2999 MatchingRule.selectEqualityMatchingRule(attributeName, schema); 3000 } 3001 3002 Attribute attr; 3003 final LDIFAttribute ldifAttr; 3004 final Object attrObject = attributes.get(lowerName); 3005 if (attrObject == null) 3006 { 3007 attr = null; 3008 ldifAttr = null; 3009 } 3010 else 3011 { 3012 if (attrObject instanceof Attribute) 3013 { 3014 attr = (Attribute) attrObject; 3015 ldifAttr = new LDIFAttribute(attr.getName(), matchingRule, 3016 attr.getRawValues()[0]); 3017 attributes.put(lowerName, ldifAttr); 3018 } 3019 else 3020 { 3021 attr = null; 3022 ldifAttr = (LDIFAttribute) attrObject; 3023 } 3024 } 3025 3026 final int length = line.length(); 3027 if (length == (colonPos+1)) 3028 { 3029 // This means that the attribute has a zero-length value, which is 3030 // acceptable. 3031 if (attrObject == null) 3032 { 3033 attr = new Attribute(attributeName, matchingRule, ""); 3034 attributes.put(lowerName, attr); 3035 } 3036 else 3037 { 3038 try 3039 { 3040 if (! ldifAttr.addValue(new ASN1OctetString(), 3041 duplicateValueBehavior)) 3042 { 3043 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3044 { 3045 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3046 firstLineNumber, attributeName), firstLineNumber, true, 3047 ldifLines, null); 3048 } 3049 } 3050 } 3051 catch (LDAPException le) 3052 { 3053 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, 3054 firstLineNumber, attributeName, getExceptionMessage(le)), 3055 firstLineNumber, true, ldifLines, le); 3056 } 3057 } 3058 } 3059 else if (line.charAt(colonPos+1) == ':') 3060 { 3061 // Skip over any spaces leading up to the value, and then the rest of 3062 // the string is the base64-encoded attribute value. 3063 int pos = colonPos+2; 3064 while ((pos < length) && (line.charAt(pos) == ' ')) 3065 { 3066 pos++; 3067 } 3068 3069 try 3070 { 3071 final byte[] valueBytes = Base64.decode(line.substring(pos)); 3072 if (attrObject == null) 3073 { 3074 attr = new Attribute(attributeName, matchingRule, valueBytes); 3075 attributes.put(lowerName, attr); 3076 } 3077 else 3078 { 3079 try 3080 { 3081 if (! ldifAttr.addValue(new ASN1OctetString(valueBytes), 3082 duplicateValueBehavior)) 3083 { 3084 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3085 { 3086 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3087 firstLineNumber, attributeName), firstLineNumber, true, 3088 ldifLines, null); 3089 } 3090 } 3091 } 3092 catch (LDAPException le) 3093 { 3094 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, 3095 firstLineNumber, attributeName, getExceptionMessage(le)), 3096 firstLineNumber, true, ldifLines, le); 3097 } 3098 } 3099 } 3100 catch (final ParseException pe) 3101 { 3102 debugException(pe); 3103 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3104 attributeName, firstLineNumber, 3105 pe.getMessage()), 3106 firstLineNumber, true, ldifLines, pe); 3107 } 3108 } 3109 else if (line.charAt(colonPos+1) == '<') 3110 { 3111 // Skip over any spaces leading up to the value, and then the rest of 3112 // the string is a URL that indicates where to get the real content. 3113 // At the present time, we'll only support the file URLs. 3114 int pos = colonPos+2; 3115 while ((pos < length) && (line.charAt(pos) == ' ')) 3116 { 3117 pos++; 3118 } 3119 3120 final byte[] urlBytes; 3121 final String urlString = line.substring(pos); 3122 try 3123 { 3124 urlBytes = 3125 retrieveURLBytes(urlString, relativeBasePath, firstLineNumber); 3126 } 3127 catch (final Exception e) 3128 { 3129 debugException(e); 3130 throw new LDIFException( 3131 ERR_READ_URL_EXCEPTION.get(attributeName, urlString, 3132 firstLineNumber, e), 3133 firstLineNumber, true, ldifLines, e); 3134 } 3135 3136 if (attrObject == null) 3137 { 3138 attr = new Attribute(attributeName, matchingRule, urlBytes); 3139 attributes.put(lowerName, attr); 3140 } 3141 else 3142 { 3143 try 3144 { 3145 if (! ldifAttr.addValue(new ASN1OctetString(urlBytes), 3146 duplicateValueBehavior)) 3147 { 3148 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3149 { 3150 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3151 firstLineNumber, attributeName), firstLineNumber, true, 3152 ldifLines, null); 3153 } 3154 } 3155 } 3156 catch (final LDIFException le) 3157 { 3158 debugException(le); 3159 throw le; 3160 } 3161 catch (final Exception e) 3162 { 3163 debugException(e); 3164 throw new LDIFException( 3165 ERR_READ_URL_EXCEPTION.get(attributeName, urlString, 3166 firstLineNumber, e), 3167 firstLineNumber, true, ldifLines, e); 3168 } 3169 } 3170 } 3171 else 3172 { 3173 // Skip over any spaces leading up to the value, and then the rest of 3174 // the string is the value. 3175 int pos = colonPos+1; 3176 while ((pos < length) && (line.charAt(pos) == ' ')) 3177 { 3178 pos++; 3179 } 3180 3181 final String valueString = line.substring(pos); 3182 if (attrObject == null) 3183 { 3184 attr = new Attribute(attributeName, matchingRule, valueString); 3185 attributes.put(lowerName, attr); 3186 } 3187 else 3188 { 3189 try 3190 { 3191 if (! ldifAttr.addValue(new ASN1OctetString(valueString), 3192 duplicateValueBehavior)) 3193 { 3194 if (duplicateValueBehavior != DuplicateValueBehavior.STRIP) 3195 { 3196 throw new LDIFException(ERR_READ_DUPLICATE_VALUE.get(dn, 3197 firstLineNumber, attributeName), firstLineNumber, true, 3198 ldifLines, null); 3199 } 3200 } 3201 } 3202 catch (LDAPException le) 3203 { 3204 throw new LDIFException(ERR_READ_VALUE_SYNTAX_VIOLATION.get(dn, 3205 firstLineNumber, attributeName, getExceptionMessage(le)), 3206 firstLineNumber, true, ldifLines, le); 3207 } 3208 } 3209 } 3210 } 3211 3212 final ArrayList<Attribute> attrList = 3213 new ArrayList<Attribute>(attributes.size()); 3214 for (final Object o : attributes.values()) 3215 { 3216 if (o instanceof Attribute) 3217 { 3218 attrList.add((Attribute) o); 3219 } 3220 else 3221 { 3222 attrList.add(((LDIFAttribute) o).toAttribute()); 3223 } 3224 } 3225 3226 return attrList; 3227 } 3228 3229 3230 3231 /** 3232 * Retrieves the bytes that make up the file referenced by the given URL. 3233 * 3234 * @param urlString The string representation of the URL to retrieve. 3235 * @param relativeBasePath The base path that will be prepended to relative 3236 * paths in order to obtain an absolute path. 3237 * @param firstLineNumber The line number for the start of the record. 3238 * 3239 * @return The bytes contained in the specified file, or an empty array if 3240 * the specified file is empty. 3241 * 3242 * @throws LDIFException If the provided URL is malformed or references a 3243 * nonexistent file. 3244 * 3245 * @throws IOException If a problem is encountered while attempting to read 3246 * from the target file. 3247 */ 3248 private static byte[] retrieveURLBytes(final String urlString, 3249 final String relativeBasePath, 3250 final long firstLineNumber) 3251 throws LDIFException, IOException 3252 { 3253 int pos; 3254 String path; 3255 final String lowerURLString = toLowerCase(urlString); 3256 if (lowerURLString.startsWith("file:/")) 3257 { 3258 pos = 6; 3259 while ((pos < urlString.length()) && (urlString.charAt(pos) == '/')) 3260 { 3261 pos++; 3262 } 3263 3264 path = urlString.substring(pos-1); 3265 } 3266 else if (lowerURLString.startsWith("file:")) 3267 { 3268 // A file: URL that doesn't include a slash will be interpreted as a 3269 // relative path. 3270 path = relativeBasePath + urlString.substring(5); 3271 } 3272 else 3273 { 3274 throw new LDIFException(ERR_READ_URL_INVALID_SCHEME.get(urlString), 3275 firstLineNumber, true); 3276 } 3277 3278 final File f = new File(path); 3279 if (! f.exists()) 3280 { 3281 throw new LDIFException( 3282 ERR_READ_URL_NO_SUCH_FILE.get(urlString, f.getAbsolutePath()), 3283 firstLineNumber, true); 3284 } 3285 3286 // In order to conserve memory, we'll only allow values to be read from 3287 // files no larger than 10 megabytes. 3288 final long fileSize = f.length(); 3289 if (fileSize > (10 * 1024 * 1024)) 3290 { 3291 throw new LDIFException( 3292 ERR_READ_URL_FILE_TOO_LARGE.get(urlString, f.getAbsolutePath(), 3293 (10*1024*1024)), 3294 firstLineNumber, true); 3295 } 3296 3297 int fileBytesRemaining = (int) fileSize; 3298 final byte[] fileData = new byte[(int) fileSize]; 3299 final FileInputStream fis = new FileInputStream(f); 3300 try 3301 { 3302 int fileBytesRead = 0; 3303 while (fileBytesRead < fileSize) 3304 { 3305 final int bytesRead = 3306 fis.read(fileData, fileBytesRead, fileBytesRemaining); 3307 if (bytesRead < 0) 3308 { 3309 // We hit the end of the file before we expected to. This shouldn't 3310 // happen unless the file size changed since we first looked at it, 3311 // which we won't allow. 3312 throw new LDIFException( 3313 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString, 3314 f.getAbsolutePath()), 3315 firstLineNumber, true); 3316 } 3317 3318 fileBytesRead += bytesRead; 3319 fileBytesRemaining -= bytesRead; 3320 } 3321 3322 if (fis.read() != -1) 3323 { 3324 // There is still more data to read. This shouldn't happen unless the 3325 // file size changed since we first looked at it, which we won't allow. 3326 throw new LDIFException( 3327 ERR_READ_URL_FILE_SIZE_CHANGED.get(urlString, f.getAbsolutePath()), 3328 firstLineNumber, true); 3329 } 3330 } 3331 finally 3332 { 3333 fis.close(); 3334 } 3335 3336 return fileData; 3337 } 3338 3339 3340 3341 /** 3342 * Parses the data available through the provided iterator into an array of 3343 * modifications suitable for use in a modify change record. 3344 * 3345 * @param dn The DN of the entry being parsed. 3346 * @param trailingSpaceBehavior The behavior that should be exhibited when 3347 * encountering attribute values which are not 3348 * base64-encoded but contain trailing spaces. 3349 * @param ldifLines The lines that comprise the LDIF 3350 * representation of the full record being 3351 * parsed. 3352 * @param iterator The iterator to use to access the 3353 * modification data. 3354 * @param firstLineNumber The line number for the start of the record. 3355 * @param schema The schema to use in processing. 3356 * 3357 * @return An array containing the modifications that were read. 3358 * 3359 * @throws LDIFException If the provided LDIF data cannot be decoded as a 3360 * set of modifications. 3361 */ 3362 private static Modification[] parseModifications(final String dn, 3363 final TrailingSpaceBehavior trailingSpaceBehavior, 3364 final ArrayList<StringBuilder> ldifLines, 3365 final Iterator<StringBuilder> iterator, 3366 final long firstLineNumber, final Schema schema) 3367 throws LDIFException 3368 { 3369 final ArrayList<Modification> modList = 3370 new ArrayList<Modification>(ldifLines.size()); 3371 3372 while (iterator.hasNext()) 3373 { 3374 // The first line must start with "add:", "delete:", "replace:", or 3375 // "increment:" followed by an attribute name. 3376 StringBuilder line = iterator.next(); 3377 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3378 int colonPos = line.indexOf(":"); 3379 if (colonPos < 0) 3380 { 3381 throw new LDIFException(ERR_READ_MOD_CR_NO_MODTYPE.get(firstLineNumber), 3382 firstLineNumber, true, ldifLines, null); 3383 } 3384 3385 final ModificationType modType; 3386 final String modTypeStr = toLowerCase(line.substring(0, colonPos)); 3387 if (modTypeStr.equals("add")) 3388 { 3389 modType = ModificationType.ADD; 3390 } 3391 else if (modTypeStr.equals("delete")) 3392 { 3393 modType = ModificationType.DELETE; 3394 } 3395 else if (modTypeStr.equals("replace")) 3396 { 3397 modType = ModificationType.REPLACE; 3398 } 3399 else if (modTypeStr.equals("increment")) 3400 { 3401 modType = ModificationType.INCREMENT; 3402 } 3403 else 3404 { 3405 throw new LDIFException(ERR_READ_MOD_CR_INVALID_MODTYPE.get(modTypeStr, 3406 firstLineNumber), 3407 firstLineNumber, true, ldifLines, null); 3408 } 3409 3410 String attributeName; 3411 int length = line.length(); 3412 if (length == (colonPos+1)) 3413 { 3414 // The colon was the last character on the line. This is not 3415 // acceptable. 3416 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get( 3417 firstLineNumber), 3418 firstLineNumber, true, ldifLines, null); 3419 } 3420 else if (line.charAt(colonPos+1) == ':') 3421 { 3422 // Skip over any spaces leading up to the value, and then the rest of 3423 // the string is the base64-encoded attribute name. 3424 int pos = colonPos+2; 3425 while ((pos < length) && (line.charAt(pos) == ' ')) 3426 { 3427 pos++; 3428 } 3429 3430 try 3431 { 3432 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3433 attributeName = new String(dnBytes, "UTF-8"); 3434 } 3435 catch (final ParseException pe) 3436 { 3437 debugException(pe); 3438 throw new LDIFException( 3439 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get( 3440 firstLineNumber, pe.getMessage()), 3441 firstLineNumber, true, ldifLines, pe); 3442 } 3443 catch (final Exception e) 3444 { 3445 debugException(e); 3446 throw new LDIFException( 3447 ERR_READ_MOD_CR_MODTYPE_CANNOT_BASE64_DECODE_ATTR.get( 3448 firstLineNumber, e), 3449 firstLineNumber, true, ldifLines, e); 3450 } 3451 } 3452 else 3453 { 3454 // Skip over any spaces leading up to the value, and then the rest of 3455 // the string is the attribute name. 3456 int pos = colonPos+1; 3457 while ((pos < length) && (line.charAt(pos) == ' ')) 3458 { 3459 pos++; 3460 } 3461 3462 attributeName = line.substring(pos); 3463 } 3464 3465 if (attributeName.length() == 0) 3466 { 3467 throw new LDIFException(ERR_READ_MOD_CR_MODTYPE_NO_ATTR.get( 3468 firstLineNumber), 3469 firstLineNumber, true, ldifLines, null); 3470 } 3471 3472 3473 // The next zero or more lines may be the set of attribute values. Keep 3474 // reading until we reach the end of the iterator or until we find a line 3475 // with just a "-". 3476 final ArrayList<ASN1OctetString> valueList = 3477 new ArrayList<ASN1OctetString>(ldifLines.size()); 3478 while (iterator.hasNext()) 3479 { 3480 line = iterator.next(); 3481 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3482 if (line.toString().equals("-")) 3483 { 3484 break; 3485 } 3486 3487 colonPos = line.indexOf(":"); 3488 if (colonPos < 0) 3489 { 3490 throw new LDIFException(ERR_READ_NO_ATTR_COLON.get(firstLineNumber), 3491 firstLineNumber, true, ldifLines, null); 3492 } 3493 else if (! line.substring(0, colonPos).equalsIgnoreCase(attributeName)) 3494 { 3495 // There are a couple of cases in which this might be acceptable: 3496 // - If the two names are logically equivalent, but have an alternate 3497 // name (or OID) for the target attribute type, or if there are 3498 // attribute options and the options are just in a different order. 3499 // - If this is the first value for the target attribute and the 3500 // alternate name includes a "binary" option that the original 3501 // attribute name did not have. In this case, all subsequent values 3502 // will also be required to have the binary option. 3503 final String alternateName = line.substring(0, colonPos); 3504 3505 3506 // Check to see if the base names are equivalent. 3507 boolean baseNameEquivalent = false; 3508 final String expectedBaseName = Attribute.getBaseName(attributeName); 3509 final String alternateBaseName = Attribute.getBaseName(alternateName); 3510 if (alternateBaseName.equalsIgnoreCase(expectedBaseName)) 3511 { 3512 baseNameEquivalent = true; 3513 } 3514 else 3515 { 3516 if (schema != null) 3517 { 3518 final AttributeTypeDefinition expectedAT = 3519 schema.getAttributeType(expectedBaseName); 3520 final AttributeTypeDefinition alternateAT = 3521 schema.getAttributeType(alternateBaseName); 3522 if ((expectedAT != null) && (alternateAT != null) && 3523 expectedAT.equals(alternateAT)) 3524 { 3525 baseNameEquivalent = true; 3526 } 3527 } 3528 } 3529 3530 3531 // Check to see if the attribute options are equivalent. 3532 final Set<String> expectedOptions = 3533 Attribute.getOptions(attributeName); 3534 final Set<String> lowerExpectedOptions = 3535 new HashSet<String>(expectedOptions.size()); 3536 for (final String s : expectedOptions) 3537 { 3538 lowerExpectedOptions.add(toLowerCase(s)); 3539 } 3540 3541 final Set<String> alternateOptions = 3542 Attribute.getOptions(alternateName); 3543 final Set<String> lowerAlternateOptions = 3544 new HashSet<String>(alternateOptions.size()); 3545 for (final String s : alternateOptions) 3546 { 3547 lowerAlternateOptions.add(toLowerCase(s)); 3548 } 3549 3550 final boolean optionsEquivalent = 3551 lowerAlternateOptions.equals(lowerExpectedOptions); 3552 3553 3554 if (baseNameEquivalent && optionsEquivalent) 3555 { 3556 // This is fine. The two attribute descriptions are logically 3557 // equivalent. We'll continue using the attribute description that 3558 // was provided first. 3559 } 3560 else if (valueList.isEmpty() && baseNameEquivalent && 3561 lowerAlternateOptions.remove("binary") && 3562 lowerAlternateOptions.equals(lowerExpectedOptions)) 3563 { 3564 // This means that the provided value is the first value for the 3565 // attribute, and that the only significant difference is that the 3566 // provided attribute description included an unexpected "binary" 3567 // option. We'll accept this, but will require any additional 3568 // values for this modification to also include the binary option, 3569 // and we'll use the binary option in the attribute that is 3570 // eventually created. 3571 attributeName = alternateName; 3572 } 3573 else 3574 { 3575 // This means that either the base names are different or the sets 3576 // of options are incompatible. This is not acceptable. 3577 throw new LDIFException(ERR_READ_MOD_CR_ATTR_MISMATCH.get( 3578 firstLineNumber, 3579 line.substring(0, colonPos), 3580 attributeName), 3581 firstLineNumber, true, ldifLines, null); 3582 } 3583 } 3584 3585 length = line.length(); 3586 final ASN1OctetString value; 3587 if (length == (colonPos+1)) 3588 { 3589 // The colon was the last character on the line. This is fine. 3590 value = new ASN1OctetString(); 3591 } 3592 else if (line.charAt(colonPos+1) == ':') 3593 { 3594 // Skip over any spaces leading up to the value, and then the rest of 3595 // the string is the base64-encoded value. This is unusual and 3596 // unnecessary, but is nevertheless acceptable. 3597 int pos = colonPos+2; 3598 while ((pos < length) && (line.charAt(pos) == ' ')) 3599 { 3600 pos++; 3601 } 3602 3603 try 3604 { 3605 value = new ASN1OctetString(Base64.decode(line.substring(pos))); 3606 } 3607 catch (final ParseException pe) 3608 { 3609 debugException(pe); 3610 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3611 attributeName, firstLineNumber, pe.getMessage()), 3612 firstLineNumber, true, ldifLines, pe); 3613 } 3614 catch (final Exception e) 3615 { 3616 debugException(e); 3617 throw new LDIFException(ERR_READ_CANNOT_BASE64_DECODE_ATTR.get( 3618 firstLineNumber, e), 3619 firstLineNumber, true, ldifLines, e); 3620 } 3621 } 3622 else 3623 { 3624 // Skip over any spaces leading up to the value, and then the rest of 3625 // the string is the value. 3626 int pos = colonPos+1; 3627 while ((pos < length) && (line.charAt(pos) == ' ')) 3628 { 3629 pos++; 3630 } 3631 3632 value = new ASN1OctetString(line.substring(pos)); 3633 } 3634 3635 valueList.add(value); 3636 } 3637 3638 final ASN1OctetString[] values = new ASN1OctetString[valueList.size()]; 3639 valueList.toArray(values); 3640 3641 // If it's an add modification type, then there must be at least one 3642 // value. 3643 if ((modType.intValue() == ModificationType.ADD.intValue()) && 3644 (values.length == 0)) 3645 { 3646 throw new LDIFException(ERR_READ_MOD_CR_NO_ADD_VALUES.get(attributeName, 3647 firstLineNumber), 3648 firstLineNumber, true, ldifLines, null); 3649 } 3650 3651 // If it's an increment modification type, then there must be exactly one 3652 // value. 3653 if ((modType.intValue() == ModificationType.INCREMENT.intValue()) && 3654 (values.length != 1)) 3655 { 3656 throw new LDIFException(ERR_READ_MOD_CR_INVALID_INCR_VALUE_COUNT.get( 3657 firstLineNumber, attributeName), 3658 firstLineNumber, true, ldifLines, null); 3659 } 3660 3661 modList.add(new Modification(modType, attributeName, values)); 3662 } 3663 3664 final Modification[] mods = new Modification[modList.size()]; 3665 modList.toArray(mods); 3666 return mods; 3667 } 3668 3669 3670 3671 /** 3672 * Parses the data available through the provided iterator as the body of a 3673 * modify DN change record (i.e., the newrdn, deleteoldrdn, and optional 3674 * newsuperior lines). 3675 * 3676 * @param ldifLines The lines that comprise the LDIF 3677 * representation of the full record being 3678 * parsed. 3679 * @param iterator The iterator to use to access the modify DN 3680 * data. 3681 * @param dn The current DN of the entry. 3682 * @param controls The set of controls to include in the change 3683 * record. 3684 * @param trailingSpaceBehavior The behavior that should be exhibited when 3685 * encountering attribute values which are not 3686 * base64-encoded but contain trailing spaces. 3687 * @param firstLineNumber The line number for the start of the record. 3688 * 3689 * @return The decoded modify DN change record. 3690 * 3691 * @throws LDIFException If the provided LDIF data cannot be decoded as a 3692 * modify DN change record. 3693 */ 3694 private static LDIFModifyDNChangeRecord parseModifyDNChangeRecord( 3695 final ArrayList<StringBuilder> ldifLines, 3696 final Iterator<StringBuilder> iterator, final String dn, 3697 final List<Control> controls, 3698 final TrailingSpaceBehavior trailingSpaceBehavior, 3699 final long firstLineNumber) 3700 throws LDIFException 3701 { 3702 // The next line must be the new RDN, and it must start with "newrdn:". 3703 StringBuilder line = iterator.next(); 3704 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3705 int colonPos = line.indexOf(":"); 3706 if ((colonPos < 0) || 3707 (! line.substring(0, colonPos).equalsIgnoreCase("newrdn"))) 3708 { 3709 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_COLON.get( 3710 firstLineNumber), 3711 firstLineNumber, true, ldifLines, null); 3712 } 3713 3714 final String newRDN; 3715 int length = line.length(); 3716 if (length == (colonPos+1)) 3717 { 3718 // The colon was the last character on the line. This is not acceptable. 3719 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get( 3720 firstLineNumber), 3721 firstLineNumber, true, ldifLines, null); 3722 } 3723 else if (line.charAt(colonPos+1) == ':') 3724 { 3725 // Skip over any spaces leading up to the value, and then the rest of the 3726 // string is the base64-encoded new RDN. 3727 int pos = colonPos+2; 3728 while ((pos < length) && (line.charAt(pos) == ' ')) 3729 { 3730 pos++; 3731 } 3732 3733 try 3734 { 3735 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3736 newRDN = new String(dnBytes, "UTF-8"); 3737 } 3738 catch (final ParseException pe) 3739 { 3740 debugException(pe); 3741 throw new LDIFException( 3742 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber, 3743 pe.getMessage()), 3744 firstLineNumber, true, ldifLines, pe); 3745 } 3746 catch (final Exception e) 3747 { 3748 debugException(e); 3749 throw new LDIFException( 3750 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWRDN.get(firstLineNumber, 3751 e), 3752 firstLineNumber, true, ldifLines, e); 3753 } 3754 } 3755 else 3756 { 3757 // Skip over any spaces leading up to the value, and then the rest of the 3758 // string is the new RDN. 3759 int pos = colonPos+1; 3760 while ((pos < length) && (line.charAt(pos) == ' ')) 3761 { 3762 pos++; 3763 } 3764 3765 newRDN = line.substring(pos); 3766 } 3767 3768 if (newRDN.length() == 0) 3769 { 3770 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWRDN_VALUE.get( 3771 firstLineNumber), 3772 firstLineNumber, true, ldifLines, null); 3773 } 3774 3775 3776 // The next line must be the deleteOldRDN flag, and it must start with 3777 // 'deleteoldrdn:'. 3778 if (! iterator.hasNext()) 3779 { 3780 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get( 3781 firstLineNumber), 3782 firstLineNumber, true, ldifLines, null); 3783 } 3784 3785 line = iterator.next(); 3786 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3787 colonPos = line.indexOf(":"); 3788 if ((colonPos < 0) || 3789 (! line.substring(0, colonPos).equalsIgnoreCase("deleteoldrdn"))) 3790 { 3791 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_COLON.get( 3792 firstLineNumber), 3793 firstLineNumber, true, ldifLines, null); 3794 } 3795 3796 final String deleteOldRDNStr; 3797 length = line.length(); 3798 if (length == (colonPos+1)) 3799 { 3800 // The colon was the last character on the line. This is not acceptable. 3801 throw new LDIFException(ERR_READ_MODDN_CR_NO_DELOLDRDN_VALUE.get( 3802 firstLineNumber), 3803 firstLineNumber, true, ldifLines, null); 3804 } 3805 else if (line.charAt(colonPos+1) == ':') 3806 { 3807 // Skip over any spaces leading up to the value, and then the rest of the 3808 // string is the base64-encoded value. This is unusual and 3809 // unnecessary, but is nevertheless acceptable. 3810 int pos = colonPos+2; 3811 while ((pos < length) && (line.charAt(pos) == ' ')) 3812 { 3813 pos++; 3814 } 3815 3816 try 3817 { 3818 final byte[] changeTypeBytes = Base64.decode(line.substring(pos)); 3819 deleteOldRDNStr = new String(changeTypeBytes, "UTF-8"); 3820 } 3821 catch (final ParseException pe) 3822 { 3823 debugException(pe); 3824 throw new LDIFException( 3825 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get( 3826 firstLineNumber, pe.getMessage()), 3827 firstLineNumber, true, ldifLines, pe); 3828 } 3829 catch (final Exception e) 3830 { 3831 debugException(e); 3832 throw new LDIFException( 3833 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_DELOLDRDN.get( 3834 firstLineNumber, e), 3835 firstLineNumber, true, ldifLines, e); 3836 } 3837 } 3838 else 3839 { 3840 // Skip over any spaces leading up to the value, and then the rest of the 3841 // string is the value. 3842 int pos = colonPos+1; 3843 while ((pos < length) && (line.charAt(pos) == ' ')) 3844 { 3845 pos++; 3846 } 3847 3848 deleteOldRDNStr = line.substring(pos); 3849 } 3850 3851 final boolean deleteOldRDN; 3852 if (deleteOldRDNStr.equals("0")) 3853 { 3854 deleteOldRDN = false; 3855 } 3856 else if (deleteOldRDNStr.equals("1")) 3857 { 3858 deleteOldRDN = true; 3859 } 3860 else if (deleteOldRDNStr.equalsIgnoreCase("false") || 3861 deleteOldRDNStr.equalsIgnoreCase("no")) 3862 { 3863 // This is technically illegal, but we'll allow it. 3864 deleteOldRDN = false; 3865 } 3866 else if (deleteOldRDNStr.equalsIgnoreCase("true") || 3867 deleteOldRDNStr.equalsIgnoreCase("yes")) 3868 { 3869 // This is also technically illegal, but we'll allow it. 3870 deleteOldRDN = false; 3871 } 3872 else 3873 { 3874 throw new LDIFException(ERR_READ_MODDN_CR_INVALID_DELOLDRDN.get( 3875 deleteOldRDNStr, firstLineNumber), 3876 firstLineNumber, true, ldifLines, null); 3877 } 3878 3879 3880 // If there is another line, then it must be the new superior DN and it must 3881 // start with "newsuperior:". If this is absent, then it's fine. 3882 final String newSuperiorDN; 3883 if (iterator.hasNext()) 3884 { 3885 line = iterator.next(); 3886 handleTrailingSpaces(line, dn, firstLineNumber, trailingSpaceBehavior); 3887 colonPos = line.indexOf(":"); 3888 if ((colonPos < 0) || 3889 (! line.substring(0, colonPos).equalsIgnoreCase("newsuperior"))) 3890 { 3891 throw new LDIFException(ERR_READ_MODDN_CR_NO_NEWSUPERIOR_COLON.get( 3892 firstLineNumber), 3893 firstLineNumber, true, ldifLines, null); 3894 } 3895 3896 length = line.length(); 3897 if (length == (colonPos+1)) 3898 { 3899 // The colon was the last character on the line. This is fine. 3900 newSuperiorDN = ""; 3901 } 3902 else if (line.charAt(colonPos+1) == ':') 3903 { 3904 // Skip over any spaces leading up to the value, and then the rest of 3905 // the string is the base64-encoded new superior DN. 3906 int pos = colonPos+2; 3907 while ((pos < length) && (line.charAt(pos) == ' ')) 3908 { 3909 pos++; 3910 } 3911 3912 try 3913 { 3914 final byte[] dnBytes = Base64.decode(line.substring(pos)); 3915 newSuperiorDN = new String(dnBytes, "UTF-8"); 3916 } 3917 catch (final ParseException pe) 3918 { 3919 debugException(pe); 3920 throw new LDIFException( 3921 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get( 3922 firstLineNumber, pe.getMessage()), 3923 firstLineNumber, true, ldifLines, pe); 3924 } 3925 catch (final Exception e) 3926 { 3927 debugException(e); 3928 throw new LDIFException( 3929 ERR_READ_MODDN_CR_CANNOT_BASE64_DECODE_NEWSUPERIOR.get( 3930 firstLineNumber, e), 3931 firstLineNumber, true, ldifLines, e); 3932 } 3933 } 3934 else 3935 { 3936 // Skip over any spaces leading up to the value, and then the rest of 3937 // the string is the new superior DN. 3938 int pos = colonPos+1; 3939 while ((pos < length) && (line.charAt(pos) == ' ')) 3940 { 3941 pos++; 3942 } 3943 3944 newSuperiorDN = line.substring(pos); 3945 } 3946 } 3947 else 3948 { 3949 newSuperiorDN = null; 3950 } 3951 3952 3953 // There must not be any more lines. 3954 if (iterator.hasNext()) 3955 { 3956 throw new LDIFException(ERR_READ_CR_EXTRA_MODDN_DATA.get(firstLineNumber), 3957 firstLineNumber, true, ldifLines, null); 3958 } 3959 3960 return new LDIFModifyDNChangeRecord(dn, newRDN, deleteOldRDN, 3961 newSuperiorDN, controls); 3962 } 3963 3964 3965 3966 /** 3967 * Examines the line contained in the provided buffer to determine whether it 3968 * may contain one or more illegal trailing spaces. If it does, then those 3969 * spaces will either be stripped out or an exception will be thrown to 3970 * indicate that they are illegal. 3971 * 3972 * @param buffer The buffer to be examined. 3973 * @param dn The DN of the LDIF record being parsed. It 3974 * may be {@code null} if the DN is not yet 3975 * known (e.g., because the provided line is 3976 * expected to contain that DN). 3977 * @param firstLineNumber The approximate line number in the LDIF 3978 * source on which the LDIF record begins. 3979 * @param trailingSpaceBehavior The behavior that should be exhibited when 3980 * encountering attribute values which are not 3981 * base64-encoded but contain trailing spaces. 3982 * 3983 * @throws LDIFException If the line contained in the provided buffer ends 3984 * with one or more illegal trailing spaces and 3985 * {@code stripTrailingSpaces} was provided with a 3986 * value of {@code false}. 3987 */ 3988 private static void handleTrailingSpaces(final StringBuilder buffer, 3989 final String dn, final long firstLineNumber, 3990 final TrailingSpaceBehavior trailingSpaceBehavior) 3991 throws LDIFException 3992 { 3993 int pos = buffer.length() - 1; 3994 boolean trailingFound = false; 3995 while ((pos >= 0) && (buffer.charAt(pos) == ' ')) 3996 { 3997 trailingFound = true; 3998 pos--; 3999 } 4000 4001 if (trailingFound && (buffer.charAt(pos) != ':')) 4002 { 4003 switch (trailingSpaceBehavior) 4004 { 4005 case STRIP: 4006 buffer.setLength(pos+1); 4007 break; 4008 4009 case REJECT: 4010 if (dn == null) 4011 { 4012 throw new LDIFException( 4013 ERR_READ_ILLEGAL_TRAILING_SPACE_WITHOUT_DN.get(firstLineNumber, 4014 buffer.toString()), 4015 firstLineNumber, true); 4016 } 4017 else 4018 { 4019 throw new LDIFException( 4020 ERR_READ_ILLEGAL_TRAILING_SPACE_WITH_DN.get(dn, 4021 firstLineNumber, buffer.toString()), 4022 firstLineNumber, true); 4023 } 4024 4025 case RETAIN: 4026 default: 4027 // No action will be taken. 4028 break; 4029 } 4030 } 4031 } 4032 4033 4034 4035 /** 4036 * This represents an unparsed LDIFRecord. It stores the line number of the 4037 * first line of the record and each line of the record. 4038 */ 4039 private static final class UnparsedLDIFRecord 4040 { 4041 private final ArrayList<StringBuilder> lineList; 4042 private final long firstLineNumber; 4043 private final Exception failureCause; 4044 private final boolean isEOF; 4045 private final DuplicateValueBehavior duplicateValueBehavior; 4046 private final Schema schema; 4047 private final TrailingSpaceBehavior trailingSpaceBehavior; 4048 4049 4050 4051 /** 4052 * Constructor. 4053 * 4054 * @param lineList The lines that comprise the LDIF record. 4055 * @param duplicateValueBehavior The behavior to exhibit if the entry 4056 * contains duplicate attribute values. 4057 * @param trailingSpaceBehavior Specifies the behavior to exhibit when 4058 * encountering trailing spaces in 4059 * non-base64-encoded attribute values. 4060 * @param schema The schema to use when parsing, if 4061 * applicable. 4062 * @param firstLineNumber The first line number of the LDIF record. 4063 */ 4064 private UnparsedLDIFRecord(final ArrayList<StringBuilder> lineList, 4065 final DuplicateValueBehavior duplicateValueBehavior, 4066 final TrailingSpaceBehavior trailingSpaceBehavior, 4067 final Schema schema, final long firstLineNumber) 4068 { 4069 this.lineList = lineList; 4070 this.firstLineNumber = firstLineNumber; 4071 this.duplicateValueBehavior = duplicateValueBehavior; 4072 this.trailingSpaceBehavior = trailingSpaceBehavior; 4073 this.schema = schema; 4074 4075 failureCause = null; 4076 isEOF = 4077 (firstLineNumber < 0) || ((lineList != null) && lineList.isEmpty()); 4078 } 4079 4080 4081 4082 /** 4083 * Constructor. 4084 * 4085 * @param failureCause The Exception thrown when reading from the input. 4086 */ 4087 private UnparsedLDIFRecord(final Exception failureCause) 4088 { 4089 this.failureCause = failureCause; 4090 4091 lineList = null; 4092 firstLineNumber = 0; 4093 duplicateValueBehavior = DuplicateValueBehavior.REJECT; 4094 trailingSpaceBehavior = TrailingSpaceBehavior.REJECT; 4095 schema = null; 4096 isEOF = false; 4097 } 4098 4099 4100 4101 /** 4102 * Return the lines that comprise the LDIF record. 4103 * 4104 * @return The lines that comprise the LDIF record. 4105 */ 4106 private ArrayList<StringBuilder> getLineList() 4107 { 4108 return lineList; 4109 } 4110 4111 4112 4113 /** 4114 * Retrieves the behavior to exhibit when encountering duplicate attribute 4115 * values. 4116 * 4117 * @return The behavior to exhibit when encountering duplicate attribute 4118 * values. 4119 */ 4120 private DuplicateValueBehavior getDuplicateValueBehavior() 4121 { 4122 return duplicateValueBehavior; 4123 } 4124 4125 4126 4127 /** 4128 * Retrieves the behavior that should be exhibited when encountering 4129 * attribute values which are not base64-encoded but contain trailing 4130 * spaces. The LDIF specification strongly recommends that any value which 4131 * legitimately contains trailing spaces be base64-encoded, but the LDAP SDK 4132 * LDIF parser may be configured to automatically strip these spaces, to 4133 * preserve them, or to reject any entry or change record containing them. 4134 * 4135 * @return The behavior that should be exhibited when encountering 4136 * attribute values which are not base64-encoded but contain 4137 * trailing spaces. 4138 */ 4139 private TrailingSpaceBehavior getTrailingSpaceBehavior() 4140 { 4141 return trailingSpaceBehavior; 4142 } 4143 4144 4145 4146 /** 4147 * Retrieves the schema that should be used when parsing the record, if 4148 * applicable. 4149 * 4150 * @return The schema that should be used when parsing the record, or 4151 * {@code null} if none should be used. 4152 */ 4153 private Schema getSchema() 4154 { 4155 return schema; 4156 } 4157 4158 4159 4160 /** 4161 * Return the first line number of the LDIF record. 4162 * 4163 * @return The first line number of the LDIF record. 4164 */ 4165 private long getFirstLineNumber() 4166 { 4167 return firstLineNumber; 4168 } 4169 4170 4171 4172 /** 4173 * Return {@code true} iff the end of the input was reached. 4174 * 4175 * @return {@code true} iff the end of the input was reached. 4176 */ 4177 private boolean isEOF() 4178 { 4179 return isEOF; 4180 } 4181 4182 4183 4184 /** 4185 * Returns the reason that reading the record lines failed. This normally 4186 * is only non-null if something bad happened to the input stream (like 4187 * a disk read error). 4188 * 4189 * @return The reason that reading the record lines failed. 4190 */ 4191 private Exception getFailureCause() 4192 { 4193 return failureCause; 4194 } 4195 } 4196 4197 4198 /** 4199 * When processing in asynchronous mode, this thread is responsible for 4200 * reading the raw unparsed records from the input and submitting them for 4201 * processing. 4202 */ 4203 private final class LineReaderThread 4204 extends Thread 4205 { 4206 /** 4207 * Constructor. 4208 */ 4209 private LineReaderThread() 4210 { 4211 super("Asynchronous LDIF line reader"); 4212 setDaemon(true); 4213 } 4214 4215 4216 4217 /** 4218 * Reads raw, unparsed records from the input and submits them for 4219 * processing until the input is finished or closed. 4220 */ 4221 @Override() 4222 public void run() 4223 { 4224 try 4225 { 4226 boolean stopProcessing = false; 4227 while (!stopProcessing) 4228 { 4229 UnparsedLDIFRecord unparsedRecord = null; 4230 try 4231 { 4232 unparsedRecord = readUnparsedRecord(); 4233 } 4234 catch (IOException e) 4235 { 4236 debugException(e); 4237 unparsedRecord = new UnparsedLDIFRecord(e); 4238 stopProcessing = true; 4239 } 4240 catch (Exception e) 4241 { 4242 debugException(e); 4243 unparsedRecord = new UnparsedLDIFRecord(e); 4244 } 4245 4246 try 4247 { 4248 asyncParser.submit(unparsedRecord); 4249 } 4250 catch (InterruptedException e) 4251 { 4252 debugException(e); 4253 // If this thread is interrupted, then someone wants us to stop 4254 // processing, so that's what we'll do. 4255 stopProcessing = true; 4256 } 4257 4258 if ((unparsedRecord == null) || (unparsedRecord.isEOF())) 4259 { 4260 stopProcessing = true; 4261 } 4262 } 4263 } 4264 finally 4265 { 4266 try 4267 { 4268 asyncParser.shutdown(); 4269 } 4270 catch (InterruptedException e) 4271 { 4272 debugException(e); 4273 } 4274 finally 4275 { 4276 asyncParsingComplete.set(true); 4277 } 4278 } 4279 } 4280 } 4281 4282 4283 4284 /** 4285 * Used to parse Records asynchronously. 4286 */ 4287 private final class RecordParser implements Processor<UnparsedLDIFRecord, 4288 LDIFRecord> 4289 { 4290 /** 4291 * {@inheritDoc} 4292 */ 4293 public LDIFRecord process(final UnparsedLDIFRecord input) 4294 throws LDIFException 4295 { 4296 LDIFRecord record = decodeRecord(input, relativeBasePath, schema); 4297 4298 if ((record instanceof Entry) && (entryTranslator != null)) 4299 { 4300 record = entryTranslator.translate((Entry) record, 4301 input.getFirstLineNumber()); 4302 4303 if (record == null) 4304 { 4305 record = SKIP_ENTRY; 4306 } 4307 } 4308 if ((record instanceof LDIFChangeRecord) && 4309 (changeRecordTranslator != null)) 4310 { 4311 record = changeRecordTranslator.translate((LDIFChangeRecord) record, 4312 input.getFirstLineNumber()); 4313 4314 if (record == null) 4315 { 4316 record = SKIP_ENTRY; 4317 } 4318 } 4319 return record; 4320 } 4321 } 4322}