001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.csv; 019 020import static org.apache.commons.csv.Constants.BACKSLASH; 021import static org.apache.commons.csv.Constants.COMMA; 022import static org.apache.commons.csv.Constants.COMMENT; 023import static org.apache.commons.csv.Constants.CR; 024import static org.apache.commons.csv.Constants.CRLF; 025import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR; 026import static org.apache.commons.csv.Constants.EMPTY; 027import static org.apache.commons.csv.Constants.LF; 028import static org.apache.commons.csv.Constants.PIPE; 029import static org.apache.commons.csv.Constants.SP; 030import static org.apache.commons.csv.Constants.TAB; 031 032import java.io.File; 033import java.io.FileOutputStream; 034import java.io.IOException; 035import java.io.OutputStreamWriter; 036import java.io.Reader; 037import java.io.Serializable; 038import java.io.StringWriter; 039import java.io.Writer; 040import java.nio.charset.Charset; 041import java.nio.file.Files; 042import java.nio.file.Path; 043import java.sql.ResultSet; 044import java.sql.ResultSetMetaData; 045import java.sql.SQLException; 046import java.util.Arrays; 047import java.util.HashSet; 048import java.util.Set; 049 050/** 051 * Specifies the format of a CSV file and parses input. 052 * 053 * <h2>Using predefined formats</h2> 054 * 055 * <p> 056 * You can use one of the predefined formats: 057 * </p> 058 * 059 * <ul> 060 * <li>{@link #DEFAULT}</li> 061 * <li>{@link #EXCEL}</li> 062 * <li>{@link #INFORMIX_UNLOAD}</li> 063 * <li>{@link #INFORMIX_UNLOAD_CSV}</li> 064 * <li>{@link #MYSQL}</li> 065 * <li>{@link #RFC4180}</li> 066 * <li>{@link #ORACLE}</li> 067 * <li>{@link #POSTGRESQL_CSV}</li> 068 * <li>{@link #POSTGRESQL_TEXT}</li> 069 * <li>{@link #TDF}</li> 070 * </ul> 071 * 072 * <p> 073 * For example: 074 * </p> 075 * 076 * <pre> 077 * CSVParser parser = CSVFormat.EXCEL.parse(reader); 078 * </pre> 079 * 080 * <p> 081 * The {@link CSVParser} provides static methods to parse other input types, for example: 082 * </p> 083 * 084 * <pre> 085 * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL); 086 * </pre> 087 * 088 * <h2>Defining formats</h2> 089 * 090 * <p> 091 * You can extend a format by calling the {@code with} methods. For example: 092 * </p> 093 * 094 * <pre> 095 * CSVFormat.EXCEL.withNullString("N/A").withIgnoreSurroundingSpaces(true); 096 * </pre> 097 * 098 * <h2>Defining column names</h2> 099 * 100 * <p> 101 * To define the column names you want to use to access records, write: 102 * </p> 103 * 104 * <pre> 105 * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3"); 106 * </pre> 107 * 108 * <p> 109 * Calling {@link #withHeader(String...)} let's you use the given names to address values in a {@link CSVRecord}, and 110 * assumes that your CSV source does not contain a first record that also defines column names. 111 * 112 * If it does, then you are overriding this metadata with your names and you should skip the first record by calling 113 * {@link #withSkipHeaderRecord(boolean)} with {@code true}. 114 * </p> 115 * 116 * <h2>Parsing</h2> 117 * 118 * <p> 119 * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write: 120 * </p> 121 * 122 * <pre> 123 * Reader in = ...; 124 * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3").parse(in); 125 * </pre> 126 * 127 * <p> 128 * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}. 129 * </p> 130 * 131 * <h2>Referencing columns safely</h2> 132 * 133 * <p> 134 * If your source contains a header record, you can simplify your code and safely reference columns, by using 135 * {@link #withHeader(String...)} with no arguments: 136 * </p> 137 * 138 * <pre> 139 * CSVFormat.EXCEL.withHeader(); 140 * </pre> 141 * 142 * <p> 143 * This causes the parser to read the first record and use its values as column names. 144 * 145 * Then, call one of the {@link CSVRecord} get method that takes a String column name argument: 146 * </p> 147 * 148 * <pre> 149 * String value = record.get("Col1"); 150 * </pre> 151 * 152 * <p> 153 * This makes your code impervious to changes in column order in the CSV file. 154 * </p> 155 * 156 * <h2>Notes</h2> 157 * 158 * <p> 159 * This class is immutable. 160 * </p> 161 */ 162public final class CSVFormat implements Serializable { 163 164 /** 165 * Predefines formats. 166 * 167 * @since 1.2 168 */ 169 public enum Predefined { 170 171 /** 172 * @see CSVFormat#DEFAULT 173 */ 174 Default(CSVFormat.DEFAULT), 175 176 /** 177 * @see CSVFormat#EXCEL 178 */ 179 Excel(CSVFormat.EXCEL), 180 181 /** 182 * @see CSVFormat#INFORMIX_UNLOAD 183 * @since 1.3 184 */ 185 InformixUnload(CSVFormat.INFORMIX_UNLOAD), 186 187 /** 188 * @see CSVFormat#INFORMIX_UNLOAD_CSV 189 * @since 1.3 190 */ 191 InformixUnloadCsv(CSVFormat.INFORMIX_UNLOAD_CSV), 192 193 /** 194 * @see CSVFormat#MONGODB_CSV 195 * @since 1.7 196 */ 197 MongoDBCsv(CSVFormat.MONGODB_CSV), 198 199 /** 200 * @see CSVFormat#MONGODB_TSV 201 * @since 1.7 202 */ 203 MongoDBTsv(CSVFormat.MONGODB_TSV), 204 205 /** 206 * @see CSVFormat#MYSQL 207 */ 208 MySQL(CSVFormat.MYSQL), 209 210 /** 211 * @see CSVFormat#ORACLE 212 */ 213 Oracle(CSVFormat.ORACLE), 214 215 /** 216 * @see CSVFormat#POSTGRESQL_CSV 217 * @since 1.5 218 */ 219 PostgreSQLCsv(CSVFormat.POSTGRESQL_CSV), 220 221 /** 222 * @see CSVFormat#POSTGRESQL_CSV 223 */ 224 PostgreSQLText(CSVFormat.POSTGRESQL_TEXT), 225 226 /** 227 * @see CSVFormat#RFC4180 228 */ 229 RFC4180(CSVFormat.RFC4180), 230 231 /** 232 * @see CSVFormat#TDF 233 */ 234 TDF(CSVFormat.TDF); 235 236 private final CSVFormat format; 237 238 Predefined(final CSVFormat format) { 239 this.format = format; 240 } 241 242 /** 243 * Gets the format. 244 * 245 * @return the format. 246 */ 247 public CSVFormat getFormat() { 248 return format; 249 } 250 } 251 252 /** 253 * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines. 254 * 255 * <p> 256 * Settings are: 257 * </p> 258 * <ul> 259 * <li>{@code withDelimiter(',')}</li> 260 * <li>{@code withQuote('"')}</li> 261 * <li>{@code withRecordSeparator("\r\n")}</li> 262 * <li>{@code withIgnoreEmptyLines(true)}</li> 263 * <li>{@code withAllowDuplicateHeaderNames(true)}</li> 264 * </ul> 265 * 266 * @see Predefined#Default 267 */ 268 public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF, 269 null, null, null, false, false, false, false, false, false, true); 270 271 /** 272 * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is 273 * locale dependent, it might be necessary to customize this format to accommodate to your regional settings. 274 * 275 * <p> 276 * For example for parsing or generating a CSV file on a French system the following format will be used: 277 * </p> 278 * 279 * <pre> 280 * CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';'); 281 * </pre> 282 * 283 * <p> 284 * Settings are: 285 * </p> 286 * <ul> 287 * <li>{@code {@link #withDelimiter(char) withDelimiter(',')}}</li> 288 * <li>{@code {@link #withQuote(char) withQuote('"')}}</li> 289 * <li>{@code {@link #withRecordSeparator(String) withRecordSeparator("\r\n")}}</li> 290 * <li>{@code {@link #withIgnoreEmptyLines(boolean) withIgnoreEmptyLines(false)}}</li> 291 * <li>{@code {@link #withAllowMissingColumnNames(boolean) withAllowMissingColumnNames(true)}}</li> 292 * <li>{@code {@link #withAllowDuplicateHeaderNames(boolean) withAllowDuplicateHeaderNames(true)}}</li> 293 * </ul> 294 * <p> 295 * Note: This is currently like {@link #RFC4180} plus {@link #withAllowMissingColumnNames(boolean) 296 * withAllowMissingColumnNames(true)} and {@link #withIgnoreEmptyLines(boolean) withIgnoreEmptyLines(false)}. 297 * </p> 298 * 299 * @see Predefined#Excel 300 */ 301 // @formatter:off 302 public static final CSVFormat EXCEL = DEFAULT 303 .withIgnoreEmptyLines(false) 304 .withAllowMissingColumnNames(); 305 // @formatter:on 306 307 /** 308 * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation. 309 * 310 * <p> 311 * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special 312 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. 313 * </p> 314 * 315 * <p> 316 * Settings are: 317 * </p> 318 * <ul> 319 * <li>{@code withDelimiter(',')}</li> 320 * <li>{@code withEscape('\\')}</li> 321 * <li>{@code withQuote("\"")}</li> 322 * <li>{@code withRecordSeparator('\n')}</li> 323 * </ul> 324 * 325 * @see Predefined#MySQL 326 * @see <a href= 327 * "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm"> 328 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a> 329 * @since 1.3 330 */ 331 // @formatter:off 332 public static final CSVFormat INFORMIX_UNLOAD = DEFAULT 333 .withDelimiter(PIPE) 334 .withEscape(BACKSLASH) 335 .withQuote(DOUBLE_QUOTE_CHAR) 336 .withRecordSeparator(LF); 337 // @formatter:on 338 339 /** 340 * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.) 341 * 342 * <p> 343 * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special 344 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. 345 * </p> 346 * 347 * <p> 348 * Settings are: 349 * </p> 350 * <ul> 351 * <li>{@code withDelimiter(',')}</li> 352 * <li>{@code withQuote("\"")}</li> 353 * <li>{@code withRecordSeparator('\n')}</li> 354 * </ul> 355 * 356 * @see Predefined#MySQL 357 * @see <a href= 358 * "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm"> 359 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a> 360 * @since 1.3 361 */ 362 // @formatter:off 363 public static final CSVFormat INFORMIX_UNLOAD_CSV = DEFAULT 364 .withDelimiter(COMMA) 365 .withQuote(DOUBLE_QUOTE_CHAR) 366 .withRecordSeparator(LF); 367 // @formatter:on 368 369 /** 370 * Default MongoDB CSV format used by the {@code mongoexport} operation. 371 * <p> 372 * <b>Parsing is not supported yet.</b> 373 * </p> 374 * 375 * <p> 376 * This is a comma-delimited format. Values are double quoted only if needed and special characters are escaped with 377 * {@code '"'}. A header line with field names is expected. 378 * </p> 379 * 380 * <p> 381 * Settings are: 382 * </p> 383 * <ul> 384 * <li>{@code withDelimiter(',')}</li> 385 * <li>{@code withEscape('"')}</li> 386 * <li>{@code withQuote('"')}</li> 387 * <li>{@code withQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 388 * <li>{@code withSkipHeaderRecord(false)}</li> 389 * </ul> 390 * 391 * @see Predefined#MongoDBCsv 392 * @see <a href="https://docs.mongodb.com/manual/reference/program/mongoexport/">MongoDB mongoexport command 393 * documentation</a> 394 * @since 1.7 395 */ 396 // @formatter:off 397 public static final CSVFormat MONGODB_CSV = DEFAULT 398 .withDelimiter(COMMA) 399 .withEscape(DOUBLE_QUOTE_CHAR) 400 .withQuote(DOUBLE_QUOTE_CHAR) 401 .withQuoteMode(QuoteMode.MINIMAL) 402 .withSkipHeaderRecord(false); 403 // @formatter:off 404 405 /** 406 * Default MongoDB TSV format used by the {@code mongoexport} operation. 407 * <p> 408 * <b>Parsing is not supported yet.</b> 409 * </p> 410 * 411 * <p> 412 * This is a tab-delimited format. Values are double quoted only if needed and special 413 * characters are escaped with {@code '"'}. A header line with field names is expected. 414 * </p> 415 * 416 * <p> 417 * Settings are: 418 * </p> 419 * <ul> 420 * <li>{@code withDelimiter('\t')}</li> 421 * <li>{@code withEscape('"')}</li> 422 * <li>{@code withQuote('"')}</li> 423 * <li>{@code withQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 424 * <li>{@code withSkipHeaderRecord(false)}</li> 425 * </ul> 426 * 427 * @see Predefined#MongoDBCsv 428 * @see <a href="https://docs.mongodb.com/manual/reference/program/mongoexport/">MongoDB mongoexport command 429 * documentation</a> 430 * @since 1.7 431 */ 432 // @formatter:off 433 public static final CSVFormat MONGODB_TSV = DEFAULT 434 .withDelimiter(TAB) 435 .withEscape(DOUBLE_QUOTE_CHAR) 436 .withQuote(DOUBLE_QUOTE_CHAR) 437 .withQuoteMode(QuoteMode.MINIMAL) 438 .withSkipHeaderRecord(false); 439 // @formatter:off 440 441 /** 442 * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations. 443 * 444 * <p> 445 * This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special 446 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}. 447 * </p> 448 * 449 * <p> 450 * Settings are: 451 * </p> 452 * <ul> 453 * <li>{@code withDelimiter('\t')}</li> 454 * <li>{@code withEscape('\\')}</li> 455 * <li>{@code withIgnoreEmptyLines(false)}</li> 456 * <li>{@code withQuote(null)}</li> 457 * <li>{@code withRecordSeparator('\n')}</li> 458 * <li>{@code withNullString("\\N")}</li> 459 * <li>{@code withQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 460 * </ul> 461 * 462 * @see Predefined#MySQL 463 * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> http://dev.mysql.com/doc/refman/5.1/en/load 464 * -data.html</a> 465 */ 466 // @formatter:off 467 public static final CSVFormat MYSQL = DEFAULT 468 .withDelimiter(TAB) 469 .withEscape(BACKSLASH) 470 .withIgnoreEmptyLines(false) 471 .withQuote(null) 472 .withRecordSeparator(LF) 473 .withNullString("\\N") 474 .withQuoteMode(QuoteMode.ALL_NON_NULL); 475 // @formatter:off 476 477 /** 478 * Default Oracle format used by the SQL*Loader utility. 479 * 480 * <p> 481 * This is a comma-delimited format with the system line separator character as the record separator.Values are 482 * double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is 483 * {@code ""}. Values are trimmed. 484 * </p> 485 * 486 * <p> 487 * Settings are: 488 * </p> 489 * <ul> 490 * <li>{@code withDelimiter(',') // default is {@code FIELDS TERMINATED BY ','}}</li> 491 * <li>{@code withEscape('\\')}</li> 492 * <li>{@code withIgnoreEmptyLines(false)}</li> 493 * <li>{@code withQuote('"') // default is {@code OPTIONALLY ENCLOSED BY '"'}}</li> 494 * <li>{@code withNullString("\\N")}</li> 495 * <li>{@code withTrim()}</li> 496 * <li>{@code withSystemRecordSeparator()}</li> 497 * <li>{@code withQuoteMode(QuoteMode.MINIMAL)}</li> 498 * </ul> 499 * 500 * @see Predefined#Oracle 501 * @see <a href="https://s.apache.org/CGXG">Oracle CSV Format Specification</a> 502 * @since 1.6 503 */ 504 // @formatter:off 505 public static final CSVFormat ORACLE = DEFAULT 506 .withDelimiter(COMMA) 507 .withEscape(BACKSLASH) 508 .withIgnoreEmptyLines(false) 509 .withQuote(DOUBLE_QUOTE_CHAR) 510 .withNullString("\\N") 511 .withTrim() 512 .withSystemRecordSeparator() 513 .withQuoteMode(QuoteMode.MINIMAL); 514 // @formatter:off 515 516 /** 517 * Default PostgreSQL CSV format used by the {@code COPY} operation. 518 * 519 * <p> 520 * This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special 521 * characters are escaped with {@code '"'}. The default NULL string is {@code ""}. 522 * </p> 523 * 524 * <p> 525 * Settings are: 526 * </p> 527 * <ul> 528 * <li>{@code withDelimiter(',')}</li> 529 * <li>{@code withEscape('"')}</li> 530 * <li>{@code withIgnoreEmptyLines(false)}</li> 531 * <li>{@code withQuote('"')}</li> 532 * <li>{@code withRecordSeparator('\n')}</li> 533 * <li>{@code withNullString("")}</li> 534 * <li>{@code withQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 535 * </ul> 536 * 537 * @see Predefined#MySQL 538 * @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL COPY command 539 * documentation</a> 540 * @since 1.5 541 */ 542 // @formatter:off 543 public static final CSVFormat POSTGRESQL_CSV = DEFAULT 544 .withDelimiter(COMMA) 545 .withEscape(DOUBLE_QUOTE_CHAR) 546 .withIgnoreEmptyLines(false) 547 .withQuote(DOUBLE_QUOTE_CHAR) 548 .withRecordSeparator(LF) 549 .withNullString(EMPTY) 550 .withQuoteMode(QuoteMode.ALL_NON_NULL); 551 // @formatter:off 552 553 /** 554 * Default PostgreSQL text format used by the {@code COPY} operation. 555 * 556 * <p> 557 * This is a tab-delimited format with a LF character as the line separator. Values are double quoted and special 558 * characters are escaped with {@code '"'}. The default NULL string is {@code "\\N"}. 559 * </p> 560 * 561 * <p> 562 * Settings are: 563 * </p> 564 * <ul> 565 * <li>{@code withDelimiter('\t')}</li> 566 * <li>{@code withEscape('\\')}</li> 567 * <li>{@code withIgnoreEmptyLines(false)}</li> 568 * <li>{@code withQuote('"')}</li> 569 * <li>{@code withRecordSeparator('\n')}</li> 570 * <li>{@code withNullString("\\N")}</li> 571 * <li>{@code withQuoteMode(QuoteMode.ALL_NON_NULL)}</li> 572 * </ul> 573 * 574 * @see Predefined#MySQL 575 * @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL COPY command 576 * documentation</a> 577 * @since 1.5 578 */ 579 // @formatter:off 580 public static final CSVFormat POSTGRESQL_TEXT = DEFAULT 581 .withDelimiter(TAB) 582 .withEscape(BACKSLASH) 583 .withIgnoreEmptyLines(false) 584 .withQuote(DOUBLE_QUOTE_CHAR) 585 .withRecordSeparator(LF) 586 .withNullString("\\N") 587 .withQuoteMode(QuoteMode.ALL_NON_NULL); 588 // @formatter:off 589 590 /** 591 * Comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>. 592 * 593 * <p> 594 * Settings are: 595 * </p> 596 * <ul> 597 * <li>{@code withDelimiter(',')}</li> 598 * <li>{@code withQuote('"')}</li> 599 * <li>{@code withRecordSeparator("\r\n")}</li> 600 * <li>{@code withIgnoreEmptyLines(false)}</li> 601 * </ul> 602 * 603 * @see Predefined#RFC4180 604 */ 605 public static final CSVFormat RFC4180 = DEFAULT.withIgnoreEmptyLines(false); 606 607 private static final long serialVersionUID = 1L; 608 609 /** 610 * Tab-delimited format. 611 * 612 * <p> 613 * Settings are: 614 * </p> 615 * <ul> 616 * <li>{@code withDelimiter('\t')}</li> 617 * <li>{@code withQuote('"')}</li> 618 * <li>{@code withRecordSeparator("\r\n")}</li> 619 * <li>{@code withIgnoreSurroundingSpaces(true)}</li> 620 * </ul> 621 * 622 * @see Predefined#TDF 623 */ 624 // @formatter:off 625 public static final CSVFormat TDF = DEFAULT 626 .withDelimiter(TAB) 627 .withIgnoreSurroundingSpaces(); 628 // @formatter:on 629 630 /** 631 * Returns true if the given character is a line break character. 632 * 633 * @param c 634 * the character to check 635 * 636 * @return true if {@code c} is a line break character 637 */ 638 private static boolean isLineBreak(final char c) { 639 return c == LF || c == CR; 640 } 641 642 /** 643 * Returns true if the given character is a line break character. 644 * 645 * @param c 646 * the character to check, may be null 647 * 648 * @return true if {@code c} is a line break character (and not null) 649 */ 650 private static boolean isLineBreak(final Character c) { 651 return c != null && isLineBreak(c.charValue()); 652 } 653 654 /** 655 * Creates a new CSV format with the specified delimiter. 656 * 657 * <p> 658 * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized 659 * with null/false. 660 * </p> 661 * 662 * @param delimiter 663 * the char used for value separation, must not be a line break character 664 * @return a new CSV format. 665 * @throws IllegalArgumentException 666 * if the delimiter is a line break character 667 * 668 * @see #DEFAULT 669 * @see #RFC4180 670 * @see #MYSQL 671 * @see #EXCEL 672 * @see #TDF 673 */ 674 public static CSVFormat newFormat(final char delimiter) { 675 return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false, 676 false, false, false, false, true); 677 } 678 679 /** 680 * Gets one of the predefined formats from {@link CSVFormat.Predefined}. 681 * 682 * @param format 683 * name 684 * @return one of the predefined formats 685 * @since 1.2 686 */ 687 public static CSVFormat valueOf(final String format) { 688 return CSVFormat.Predefined.valueOf(format).getFormat(); 689 } 690 691 private final boolean allowDuplicateHeaderNames; 692 693 private final boolean allowMissingColumnNames; 694 695 private final boolean autoFlush; 696 697 private final Character commentMarker; // null if commenting is disabled 698 699 private final char delimiter; 700 701 private final Character escapeCharacter; // null if escaping is disabled 702 703 private final String[] header; // array of header column names 704 705 private final String[] headerComments; // array of header comment lines 706 707 private final boolean ignoreEmptyLines; 708 709 private final boolean ignoreHeaderCase; // should ignore header names case 710 711 private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values? 712 713 private final String nullString; // the string to be used for null values 714 715 private final Character quoteCharacter; // null if quoting is disabled 716 717 private final String quotedNullString; 718 719 private final QuoteMode quoteMode; 720 721 private final String recordSeparator; // for outputs 722 723 private final boolean skipHeaderRecord; 724 725 private final boolean trailingDelimiter; 726 727 private final boolean trim; 728 729 /** 730 * Creates a customized CSV format. 731 * 732 * @param delimiter 733 * the char used for value separation, must not be a line break character 734 * @param quoteChar 735 * the Character used as value encapsulation marker, may be {@code null} to disable 736 * @param quoteMode 737 * the quote mode 738 * @param commentStart 739 * the Character used for comment identification, may be {@code null} to disable 740 * @param escape 741 * the Character used to escape special characters in values, may be {@code null} to disable 742 * @param ignoreSurroundingSpaces 743 * {@code true} when whitespaces enclosing values should be ignored 744 * @param ignoreEmptyLines 745 * {@code true} when the parser should skip empty lines 746 * @param recordSeparator 747 * the line separator to use for output 748 * @param nullString 749 * the line separator to use for output 750 * @param headerComments 751 * the comments to be printed by the Printer before the actual CSV data 752 * @param header 753 * the header 754 * @param skipHeaderRecord 755 * TODO 756 * @param allowMissingColumnNames 757 * TODO 758 * @param ignoreHeaderCase 759 * TODO 760 * @param trim 761 * TODO 762 * @param trailingDelimiter 763 * TODO 764 * @param autoFlush 765 * @throws IllegalArgumentException 766 * if the delimiter is a line break character 767 */ 768 private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMode quoteMode, 769 final Character commentStart, final Character escape, final boolean ignoreSurroundingSpaces, 770 final boolean ignoreEmptyLines, final String recordSeparator, final String nullString, 771 final Object[] headerComments, final String[] header, final boolean skipHeaderRecord, 772 final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim, 773 final boolean trailingDelimiter, final boolean autoFlush, final boolean allowDuplicateHeaderNames) { 774 this.delimiter = delimiter; 775 this.quoteCharacter = quoteChar; 776 this.quoteMode = quoteMode; 777 this.commentMarker = commentStart; 778 this.escapeCharacter = escape; 779 this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; 780 this.allowMissingColumnNames = allowMissingColumnNames; 781 this.ignoreEmptyLines = ignoreEmptyLines; 782 this.recordSeparator = recordSeparator; 783 this.nullString = nullString; 784 this.headerComments = toStringArray(headerComments); 785 this.header = header == null ? null : header.clone(); 786 this.skipHeaderRecord = skipHeaderRecord; 787 this.ignoreHeaderCase = ignoreHeaderCase; 788 this.trailingDelimiter = trailingDelimiter; 789 this.trim = trim; 790 this.autoFlush = autoFlush; 791 this.quotedNullString = quoteCharacter + nullString + quoteCharacter; 792 this.allowDuplicateHeaderNames = allowDuplicateHeaderNames; 793 validate(); 794 } 795 796 @Override 797 public boolean equals(final Object obj) { 798 if (this == obj) { 799 return true; 800 } 801 if (obj == null) { 802 return false; 803 } 804 if (getClass() != obj.getClass()) { 805 return false; 806 } 807 808 final CSVFormat other = (CSVFormat) obj; 809 if (delimiter != other.delimiter) { 810 return false; 811 } 812 if (trailingDelimiter != other.trailingDelimiter) { 813 return false; 814 } 815 if (autoFlush != other.autoFlush) { 816 return false; 817 } 818 if (trim != other.trim) { 819 return false; 820 } 821 if (allowMissingColumnNames != other.allowMissingColumnNames) { 822 return false; 823 } 824 if (allowDuplicateHeaderNames != other.allowDuplicateHeaderNames) { 825 return false; 826 } 827 if (ignoreHeaderCase != other.ignoreHeaderCase) { 828 return false; 829 } 830 if (quoteMode != other.quoteMode) { 831 return false; 832 } 833 if (quoteCharacter == null) { 834 if (other.quoteCharacter != null) { 835 return false; 836 } 837 } else if (!quoteCharacter.equals(other.quoteCharacter)) { 838 return false; 839 } 840 if (commentMarker == null) { 841 if (other.commentMarker != null) { 842 return false; 843 } 844 } else if (!commentMarker.equals(other.commentMarker)) { 845 return false; 846 } 847 if (escapeCharacter == null) { 848 if (other.escapeCharacter != null) { 849 return false; 850 } 851 } else if (!escapeCharacter.equals(other.escapeCharacter)) { 852 return false; 853 } 854 if (nullString == null) { 855 if (other.nullString != null) { 856 return false; 857 } 858 } else if (!nullString.equals(other.nullString)) { 859 return false; 860 } 861 if (!Arrays.equals(header, other.header)) { 862 return false; 863 } 864 if (ignoreSurroundingSpaces != other.ignoreSurroundingSpaces) { 865 return false; 866 } 867 if (ignoreEmptyLines != other.ignoreEmptyLines) { 868 return false; 869 } 870 if (skipHeaderRecord != other.skipHeaderRecord) { 871 return false; 872 } 873 if (recordSeparator == null) { 874 if (other.recordSeparator != null) { 875 return false; 876 } 877 } else if (!recordSeparator.equals(other.recordSeparator)) { 878 return false; 879 } 880 if (!Arrays.equals(headerComments, other.headerComments)) { 881 return false; 882 } 883 return true; 884 } 885 886 /** 887 * Formats the specified values. 888 * 889 * @param values 890 * the values to format 891 * @return the formatted values 892 */ 893 public String format(final Object... values) { 894 final StringWriter out = new StringWriter(); 895 try (CSVPrinter csvPrinter = new CSVPrinter(out, this)) { 896 csvPrinter.printRecord(values); 897 return out.toString().trim(); 898 } catch (final IOException e) { 899 // should not happen because a StringWriter does not do IO. 900 throw new IllegalStateException(e); 901 } 902 } 903 904 /** 905 * Returns true if and only if duplicate names are allowed in the headers. 906 * 907 * @return whether duplicate header names are allowed 908 * @since 1.7 909 */ 910 public boolean getAllowDuplicateHeaderNames() { 911 return allowDuplicateHeaderNames; 912 } 913 914 /** 915 * Specifies whether missing column names are allowed when parsing the header line. 916 * 917 * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an 918 * {@link IllegalArgumentException}. 919 */ 920 public boolean getAllowMissingColumnNames() { 921 return allowMissingColumnNames; 922 } 923 924 /** 925 * Returns whether to flush on close. 926 * 927 * @return whether to flush on close. 928 * @since 1.6 929 */ 930 public boolean getAutoFlush() { 931 return autoFlush; 932 } 933 934 /** 935 * Returns the character marking the start of a line comment. 936 * 937 * @return the comment start marker, may be {@code null} 938 */ 939 public Character getCommentMarker() { 940 return commentMarker; 941 } 942 943 /** 944 * Returns the character delimiting the values (typically ';', ',' or '\t'). 945 * 946 * @return the delimiter character 947 */ 948 public char getDelimiter() { 949 return delimiter; 950 } 951 952 /** 953 * Returns the escape character. 954 * 955 * @return the escape character, may be {@code null} 956 */ 957 public Character getEscapeCharacter() { 958 return escapeCharacter; 959 } 960 961 /** 962 * Returns a copy of the header array. 963 * 964 * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file 965 */ 966 public String[] getHeader() { 967 return header != null ? header.clone() : null; 968 } 969 970 /** 971 * Returns a copy of the header comment array. 972 * 973 * @return a copy of the header comment array; {@code null} if disabled. 974 */ 975 public String[] getHeaderComments() { 976 return headerComments != null ? headerComments.clone() : null; 977 } 978 979 /** 980 * Specifies whether empty lines between records are ignored when parsing input. 981 * 982 * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty 983 * records. 984 */ 985 public boolean getIgnoreEmptyLines() { 986 return ignoreEmptyLines; 987 } 988 989 /** 990 * Specifies whether header names will be accessed ignoring case. 991 * 992 * @return {@code true} if header names cases are ignored, {@code false} if they are case sensitive. 993 * @since 1.3 994 */ 995 public boolean getIgnoreHeaderCase() { 996 return ignoreHeaderCase; 997 } 998 999 /** 1000 * Specifies whether spaces around values are ignored when parsing input. 1001 * 1002 * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value. 1003 */ 1004 public boolean getIgnoreSurroundingSpaces() { 1005 return ignoreSurroundingSpaces; 1006 } 1007 1008 /** 1009 * Gets the String to convert to and from {@code null}. 1010 * <ul> 1011 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading 1012 * records.</li> 1013 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 1014 * </ul> 1015 * 1016 * @return the String to convert to and from {@code null}. No substitution occurs if {@code null} 1017 */ 1018 public String getNullString() { 1019 return nullString; 1020 } 1021 1022 /** 1023 * Returns the character used to encapsulate values containing special characters. 1024 * 1025 * @return the quoteChar character, may be {@code null} 1026 */ 1027 public Character getQuoteCharacter() { 1028 return quoteCharacter; 1029 } 1030 1031 /** 1032 * Returns the quote policy output fields. 1033 * 1034 * @return the quote policy 1035 */ 1036 public QuoteMode getQuoteMode() { 1037 return quoteMode; 1038 } 1039 1040 /** 1041 * Returns the record separator delimiting output records. 1042 * 1043 * @return the record separator 1044 */ 1045 public String getRecordSeparator() { 1046 return recordSeparator; 1047 } 1048 1049 /** 1050 * Returns whether to skip the header record. 1051 * 1052 * @return whether to skip the header record. 1053 */ 1054 public boolean getSkipHeaderRecord() { 1055 return skipHeaderRecord; 1056 } 1057 1058 /** 1059 * Returns whether to add a trailing delimiter. 1060 * 1061 * @return whether to add a trailing delimiter. 1062 * @since 1.3 1063 */ 1064 public boolean getTrailingDelimiter() { 1065 return trailingDelimiter; 1066 } 1067 1068 /** 1069 * Returns whether to trim leading and trailing blanks. 1070 * This is used by {@link #print(Object, Appendable, boolean)} 1071 * Also by {@link CSVParser#addRecordValue(boolean)} 1072 * 1073 * @return whether to trim leading and trailing blanks. 1074 */ 1075 public boolean getTrim() { 1076 return trim; 1077 } 1078 1079 @Override 1080 public int hashCode() { 1081 final int prime = 31; 1082 int result = 1; 1083 1084 result = prime * result + delimiter; 1085 result = prime * result + ((quoteMode == null) ? 0 : quoteMode.hashCode()); 1086 result = prime * result + ((quoteCharacter == null) ? 0 : quoteCharacter.hashCode()); 1087 result = prime * result + ((commentMarker == null) ? 0 : commentMarker.hashCode()); 1088 result = prime * result + ((escapeCharacter == null) ? 0 : escapeCharacter.hashCode()); 1089 result = prime * result + ((nullString == null) ? 0 : nullString.hashCode()); 1090 result = prime * result + (ignoreSurroundingSpaces ? 1231 : 1237); 1091 result = prime * result + (ignoreHeaderCase ? 1231 : 1237); 1092 result = prime * result + (ignoreEmptyLines ? 1231 : 1237); 1093 result = prime * result + (skipHeaderRecord ? 1231 : 1237); 1094 result = prime * result + (allowDuplicateHeaderNames ? 1231 : 1237); 1095 result = prime * result + (trim ? 1231 : 1237); 1096 result = prime * result + (autoFlush ? 1231 : 1237); 1097 result = prime * result + (trailingDelimiter ? 1231 : 1237); 1098 result = prime * result + (allowMissingColumnNames ? 1231 : 1237); 1099 result = prime * result + ((recordSeparator == null) ? 0 : recordSeparator.hashCode()); 1100 result = prime * result + Arrays.hashCode(header); 1101 result = prime * result + Arrays.hashCode(headerComments); 1102 return result; 1103 } 1104 1105 /** 1106 * Specifies whether comments are supported by this format. 1107 * 1108 * Note that the comment introducer character is only recognized at the start of a line. 1109 * 1110 * @return {@code true} is comments are supported, {@code false} otherwise 1111 */ 1112 public boolean isCommentMarkerSet() { 1113 return commentMarker != null; 1114 } 1115 1116 /** 1117 * Returns whether escape are being processed. 1118 * 1119 * @return {@code true} if escapes are processed 1120 */ 1121 public boolean isEscapeCharacterSet() { 1122 return escapeCharacter != null; 1123 } 1124 1125 /** 1126 * Returns whether a nullString has been defined. 1127 * 1128 * @return {@code true} if a nullString is defined 1129 */ 1130 public boolean isNullStringSet() { 1131 return nullString != null; 1132 } 1133 1134 /** 1135 * Returns whether a quoteChar has been defined. 1136 * 1137 * @return {@code true} if a quoteChar is defined 1138 */ 1139 public boolean isQuoteCharacterSet() { 1140 return quoteCharacter != null; 1141 } 1142 1143 /** 1144 * Parses the specified content. 1145 * 1146 * <p> 1147 * See also the various static parse methods on {@link CSVParser}. 1148 * </p> 1149 * 1150 * @param in 1151 * the input stream 1152 * @return a parser over a stream of {@link CSVRecord}s. 1153 * @throws IOException 1154 * If an I/O error occurs 1155 */ 1156 public CSVParser parse(final Reader in) throws IOException { 1157 return new CSVParser(in, this); 1158 } 1159 1160 /** 1161 * Prints to the specified output. 1162 * 1163 * <p> 1164 * See also {@link CSVPrinter}. 1165 * </p> 1166 * 1167 * @param out 1168 * the output. 1169 * @return a printer to an output. 1170 * @throws IOException 1171 * thrown if the optional header cannot be printed. 1172 */ 1173 public CSVPrinter print(final Appendable out) throws IOException { 1174 return new CSVPrinter(out, this); 1175 } 1176 1177 /** 1178 * Prints to the specified output. 1179 * 1180 * <p> 1181 * See also {@link CSVPrinter}. 1182 * </p> 1183 * 1184 * @param out 1185 * the output. 1186 * @param charset 1187 * A charset. 1188 * @return a printer to an output. 1189 * @throws IOException 1190 * thrown if the optional header cannot be printed. 1191 * @since 1.5 1192 */ 1193 @SuppressWarnings("resource") 1194 public CSVPrinter print(final File out, final Charset charset) throws IOException { 1195 // The writer will be closed when close() is called. 1196 return new CSVPrinter(new OutputStreamWriter(new FileOutputStream(out), charset), this); 1197 } 1198 1199 /** 1200 * Prints the {@code value} as the next value on the line to {@code out}. The value will be escaped or encapsulated 1201 * as needed. Useful when one wants to avoid creating CSVPrinters. 1202 * Trims the value if {@link #getTrim()} is true 1203 * @param value 1204 * value to output. 1205 * @param out 1206 * where to print the value. 1207 * @param newRecord 1208 * if this a new record. 1209 * @throws IOException 1210 * If an I/O error occurs. 1211 * @since 1.4 1212 */ 1213 public void print(final Object value, final Appendable out, final boolean newRecord) throws IOException { 1214 // null values are considered empty 1215 // Only call CharSequence.toString() if you have to, helps GC-free use cases. 1216 CharSequence charSequence; 1217 if (value == null) { 1218 // https://issues.apache.org/jira/browse/CSV-203 1219 if (null == nullString) { 1220 charSequence = EMPTY; 1221 } else { 1222 if (QuoteMode.ALL == quoteMode) { 1223 charSequence = quotedNullString; 1224 } else { 1225 charSequence = nullString; 1226 } 1227 } 1228 } else { 1229 if (value instanceof CharSequence) { 1230 charSequence = (CharSequence) value; 1231 } else if (value instanceof Reader) { 1232 print((Reader) value, out, newRecord); 1233 return; 1234 } else { 1235 charSequence = value.toString(); 1236 } 1237 } 1238 charSequence = getTrim() ? trim(charSequence) : charSequence; 1239 print(value, charSequence, out, newRecord); 1240 } 1241 1242 private void print(final Object object, final CharSequence value, final Appendable out, final boolean newRecord) 1243 throws IOException { 1244 final int offset = 0; 1245 final int len = value.length(); 1246 if (!newRecord) { 1247 out.append(getDelimiter()); 1248 } 1249 if (object == null) { 1250 out.append(value); 1251 } else if (isQuoteCharacterSet()) { 1252 // the original object is needed so can check for Number 1253 printWithQuotes(object, value, out, newRecord); 1254 } else if (isEscapeCharacterSet()) { 1255 printWithEscapes(value, out); 1256 } else { 1257 out.append(value, offset, len); 1258 } 1259 } 1260 1261 /** 1262 * Prints to the specified output. 1263 * 1264 * <p> 1265 * See also {@link CSVPrinter}. 1266 * </p> 1267 * 1268 * @param out 1269 * the output. 1270 * @param charset 1271 * A charset. 1272 * @return a printer to an output. 1273 * @throws IOException 1274 * thrown if the optional header cannot be printed. 1275 * @since 1.5 1276 */ 1277 public CSVPrinter print(final Path out, final Charset charset) throws IOException { 1278 return print(Files.newBufferedWriter(out, charset)); 1279 } 1280 1281 private void print(final Reader reader, final Appendable out, final boolean newRecord) throws IOException { 1282 // Reader is never null 1283 if (!newRecord) { 1284 out.append(getDelimiter()); 1285 } 1286 if (isQuoteCharacterSet()) { 1287 printWithQuotes(reader, out); 1288 } else if (isEscapeCharacterSet()) { 1289 printWithEscapes(reader, out); 1290 } else if (out instanceof Writer) { 1291 IOUtils.copyLarge(reader, (Writer) out); 1292 } else { 1293 IOUtils.copy(reader, out); 1294 } 1295 1296 } 1297 1298 /** 1299 * Prints to the {@link System#out}. 1300 * 1301 * <p> 1302 * See also {@link CSVPrinter}. 1303 * </p> 1304 * 1305 * @return a printer to {@link System#out}. 1306 * @throws IOException 1307 * thrown if the optional header cannot be printed. 1308 * @since 1.5 1309 */ 1310 public CSVPrinter printer() throws IOException { 1311 return new CSVPrinter(System.out, this); 1312 } 1313 1314 /** 1315 * Outputs the trailing delimiter (if set) followed by the record separator (if set). 1316 * 1317 * @param out 1318 * where to write 1319 * @throws IOException 1320 * If an I/O error occurs 1321 * @since 1.4 1322 */ 1323 public void println(final Appendable out) throws IOException { 1324 if (getTrailingDelimiter()) { 1325 out.append(getDelimiter()); 1326 } 1327 if (recordSeparator != null) { 1328 out.append(recordSeparator); 1329 } 1330 } 1331 1332 /** 1333 * Prints the given {@code values} to {@code out} as a single record of delimiter separated values followed by the 1334 * record separator. 1335 * 1336 * <p> 1337 * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record 1338 * separator to the output after printing the record, so there is no need to call {@link #println(Appendable)}. 1339 * </p> 1340 * 1341 * @param out 1342 * where to write. 1343 * @param values 1344 * values to output. 1345 * @throws IOException 1346 * If an I/O error occurs. 1347 * @since 1.4 1348 */ 1349 public void printRecord(final Appendable out, final Object... values) throws IOException { 1350 for (int i = 0; i < values.length; i++) { 1351 print(values[i], out, i == 0); 1352 } 1353 println(out); 1354 } 1355 1356 /* 1357 * Note: must only be called if escaping is enabled, otherwise will generate NPE 1358 */ 1359 private void printWithEscapes(final CharSequence value, final Appendable out) throws IOException { 1360 int start = 0; 1361 int pos = 0; 1362 final int len = value.length(); 1363 final int end = len; 1364 1365 final char delim = getDelimiter(); 1366 final char escape = getEscapeCharacter().charValue(); 1367 1368 while (pos < end) { 1369 char c = value.charAt(pos); 1370 if (c == CR || c == LF || c == delim || c == escape) { 1371 // write out segment up until this char 1372 if (pos > start) { 1373 out.append(value, start, pos); 1374 } 1375 if (c == LF) { 1376 c = 'n'; 1377 } else if (c == CR) { 1378 c = 'r'; 1379 } 1380 1381 out.append(escape); 1382 out.append(c); 1383 1384 start = pos + 1; // start on the current char after this one 1385 } 1386 pos++; 1387 } 1388 1389 // write last segment 1390 if (pos > start) { 1391 out.append(value, start, pos); 1392 } 1393 } 1394 1395 private void printWithEscapes(final Reader reader, final Appendable out) throws IOException { 1396 int start = 0; 1397 int pos = 0; 1398 1399 final char delim = getDelimiter(); 1400 final char escape = getEscapeCharacter().charValue(); 1401 final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); 1402 1403 int c; 1404 while (-1 != (c = reader.read())) { 1405 builder.append((char) c); 1406 if (c == CR || c == LF || c == delim || c == escape) { 1407 // write out segment up until this char 1408 if (pos > start) { 1409 out.append(builder.substring(start, pos)); 1410 builder.setLength(0); 1411 } 1412 if (c == LF) { 1413 c = 'n'; 1414 } else if (c == CR) { 1415 c = 'r'; 1416 } 1417 1418 out.append(escape); 1419 out.append((char) c); 1420 1421 start = pos + 1; // start on the current char after this one 1422 } 1423 pos++; 1424 } 1425 1426 // write last segment 1427 if (pos > start) { 1428 out.append(builder.substring(start, pos)); 1429 } 1430 } 1431 1432 /* 1433 * Note: must only be called if quoting is enabled, otherwise will generate NPE 1434 */ 1435 // the original object is needed so can check for Number 1436 private void printWithQuotes(final Object object, final CharSequence value, final Appendable out, 1437 final boolean newRecord) throws IOException { 1438 boolean quote = false; 1439 int start = 0; 1440 int pos = 0; 1441 final int len = value.length(); 1442 final int end = len; 1443 1444 final char delimChar = getDelimiter(); 1445 final char quoteChar = getQuoteCharacter().charValue(); 1446 // If escape char not specified, default to the quote char 1447 // This avoids having to keep checking whether there is an escape character 1448 // at the cost of checking against quote twice 1449 final char escapeChar = isEscapeCharacterSet() ? getEscapeCharacter().charValue() : quoteChar; 1450 1451 QuoteMode quoteModePolicy = getQuoteMode(); 1452 if (quoteModePolicy == null) { 1453 quoteModePolicy = QuoteMode.MINIMAL; 1454 } 1455 switch (quoteModePolicy) { 1456 case ALL: 1457 case ALL_NON_NULL: 1458 quote = true; 1459 break; 1460 case NON_NUMERIC: 1461 quote = !(object instanceof Number); 1462 break; 1463 case NONE: 1464 // Use the existing escaping code 1465 printWithEscapes(value, out); 1466 return; 1467 case MINIMAL: 1468 if (len <= 0) { 1469 // always quote an empty token that is the first 1470 // on the line, as it may be the only thing on the 1471 // line. If it were not quoted in that case, 1472 // an empty line has no tokens. 1473 if (newRecord) { 1474 quote = true; 1475 } 1476 } else { 1477 char c = value.charAt(pos); 1478 1479 if (c <= COMMENT) { 1480 // Some other chars at the start of a value caused the parser to fail, so for now 1481 // encapsulate if we start in anything less than '#'. We are being conservative 1482 // by including the default comment char too. 1483 quote = true; 1484 } else { 1485 while (pos < end) { 1486 c = value.charAt(pos); 1487 if (c == LF || c == CR || c == quoteChar || c == delimChar || c == escapeChar) { 1488 quote = true; 1489 break; 1490 } 1491 pos++; 1492 } 1493 1494 if (!quote) { 1495 pos = end - 1; 1496 c = value.charAt(pos); 1497 // Some other chars at the end caused the parser to fail, so for now 1498 // encapsulate if we end in anything less than ' ' 1499 if (c <= SP) { 1500 quote = true; 1501 } 1502 } 1503 } 1504 } 1505 1506 if (!quote) { 1507 // no encapsulation needed - write out the original value 1508 out.append(value, start, end); 1509 return; 1510 } 1511 break; 1512 default: 1513 throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy); 1514 } 1515 1516 if (!quote) { 1517 // no encapsulation needed - write out the original value 1518 out.append(value, start, end); 1519 return; 1520 } 1521 1522 // we hit something that needed encapsulation 1523 out.append(quoteChar); 1524 1525 // Pick up where we left off: pos should be positioned on the first character that caused 1526 // the need for encapsulation. 1527 while (pos < end) { 1528 final char c = value.charAt(pos); 1529 if (c == quoteChar || c == escapeChar) { 1530 // write out the chunk up until this point 1531 out.append(value, start, pos); 1532 out.append(escapeChar); // now output the escape 1533 start = pos; // and restart with the matched char 1534 } 1535 pos++; 1536 } 1537 1538 // write the last segment 1539 out.append(value, start, pos); 1540 out.append(quoteChar); 1541 } 1542 1543 /** 1544 * Always use quotes unless QuoteMode is NONE, so we not have to look ahead. 1545 * 1546 * @throws IOException 1547 */ 1548 private void printWithQuotes(final Reader reader, final Appendable out) throws IOException { 1549 1550 if (getQuoteMode() == QuoteMode.NONE) { 1551 printWithEscapes(reader, out); 1552 return; 1553 } 1554 1555 int pos = 0; 1556 1557 final char quote = getQuoteCharacter().charValue(); 1558 final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE); 1559 1560 out.append(quote); 1561 1562 int c; 1563 while (-1 != (c = reader.read())) { 1564 builder.append((char) c); 1565 if (c == quote) { 1566 // write out segment up until this char 1567 if (pos > 0) { 1568 out.append(builder.substring(0, pos)); 1569 builder.setLength(0); 1570 pos = -1; 1571 } 1572 1573 out.append(quote); 1574 out.append((char) c); 1575 } 1576 pos++; 1577 } 1578 1579 // write last segment 1580 if (pos > 0) { 1581 out.append(builder.substring(0, pos)); 1582 } 1583 1584 out.append(quote); 1585 } 1586 1587 @Override 1588 public String toString() { 1589 final StringBuilder sb = new StringBuilder(); 1590 sb.append("Delimiter=<").append(delimiter).append('>'); 1591 if (isEscapeCharacterSet()) { 1592 sb.append(' '); 1593 sb.append("Escape=<").append(escapeCharacter).append('>'); 1594 } 1595 if (isQuoteCharacterSet()) { 1596 sb.append(' '); 1597 sb.append("QuoteChar=<").append(quoteCharacter).append('>'); 1598 } 1599 if (quoteMode != null) { 1600 sb.append(' '); 1601 sb.append("QuoteMode=<").append(quoteMode).append('>'); 1602 } 1603 if (isCommentMarkerSet()) { 1604 sb.append(' '); 1605 sb.append("CommentStart=<").append(commentMarker).append('>'); 1606 } 1607 if (isNullStringSet()) { 1608 sb.append(' '); 1609 sb.append("NullString=<").append(nullString).append('>'); 1610 } 1611 if (recordSeparator != null) { 1612 sb.append(' '); 1613 sb.append("RecordSeparator=<").append(recordSeparator).append('>'); 1614 } 1615 if (getIgnoreEmptyLines()) { 1616 sb.append(" EmptyLines:ignored"); 1617 } 1618 if (getIgnoreSurroundingSpaces()) { 1619 sb.append(" SurroundingSpaces:ignored"); 1620 } 1621 if (getIgnoreHeaderCase()) { 1622 sb.append(" IgnoreHeaderCase:ignored"); 1623 } 1624 sb.append(" SkipHeaderRecord:").append(skipHeaderRecord); 1625 if (headerComments != null) { 1626 sb.append(' '); 1627 sb.append("HeaderComments:").append(Arrays.toString(headerComments)); 1628 } 1629 if (header != null) { 1630 sb.append(' '); 1631 sb.append("Header:").append(Arrays.toString(header)); 1632 } 1633 return sb.toString(); 1634 } 1635 1636 private String[] toStringArray(final Object[] values) { 1637 if (values == null) { 1638 return null; 1639 } 1640 final String[] strings = new String[values.length]; 1641 for (int i = 0; i < values.length; i++) { 1642 final Object value = values[i]; 1643 strings[i] = value == null ? null : value.toString(); 1644 } 1645 return strings; 1646 } 1647 1648 private CharSequence trim(final CharSequence charSequence) { 1649 if (charSequence instanceof String) { 1650 return ((String) charSequence).trim(); 1651 } 1652 final int count = charSequence.length(); 1653 int len = count; 1654 int pos = 0; 1655 1656 while (pos < len && charSequence.charAt(pos) <= SP) { 1657 pos++; 1658 } 1659 while (pos < len && charSequence.charAt(len - 1) <= SP) { 1660 len--; 1661 } 1662 return pos > 0 || len < count ? charSequence.subSequence(pos, len) : charSequence; 1663 } 1664 1665 /** 1666 * Verifies the consistency of the parameters and throws an IllegalArgumentException if necessary. 1667 * 1668 * @throws IllegalArgumentException 1669 */ 1670 private void validate() throws IllegalArgumentException { 1671 if (isLineBreak(delimiter)) { 1672 throw new IllegalArgumentException("The delimiter cannot be a line break"); 1673 } 1674 1675 if (quoteCharacter != null && delimiter == quoteCharacter.charValue()) { 1676 throw new IllegalArgumentException( 1677 "The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')"); 1678 } 1679 1680 if (escapeCharacter != null && delimiter == escapeCharacter.charValue()) { 1681 throw new IllegalArgumentException( 1682 "The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')"); 1683 } 1684 1685 if (commentMarker != null && delimiter == commentMarker.charValue()) { 1686 throw new IllegalArgumentException( 1687 "The comment start character and the delimiter cannot be the same ('" + commentMarker + "')"); 1688 } 1689 1690 if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) { 1691 throw new IllegalArgumentException( 1692 "The comment start character and the quoteChar cannot be the same ('" + commentMarker + "')"); 1693 } 1694 1695 if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) { 1696 throw new IllegalArgumentException( 1697 "The comment start and the escape character cannot be the same ('" + commentMarker + "')"); 1698 } 1699 1700 if (escapeCharacter == null && quoteMode == QuoteMode.NONE) { 1701 throw new IllegalArgumentException("No quotes mode set but no escape character is set"); 1702 } 1703 1704 // validate header 1705 if (header != null && !allowDuplicateHeaderNames) { 1706 final Set<String> dupCheck = new HashSet<>(); 1707 for (final String hdr : header) { 1708 if (!dupCheck.add(hdr)) { 1709 throw new IllegalArgumentException( 1710 "The header contains a duplicate entry: '" + hdr + "' in " + Arrays.toString(header)); 1711 } 1712 } 1713 } 1714 } 1715 1716 /** 1717 * Returns a new {@code CSVFormat} that allows duplicate header names. 1718 * 1719 * @return a new {@code CSVFormat} that allows duplicate header names 1720 * @since 1.7 1721 */ 1722 public CSVFormat withAllowDuplicateHeaderNames() { 1723 return withAllowDuplicateHeaderNames(true); 1724 } 1725 1726 /** 1727 * Returns a new {@code CSVFormat} with duplicate header names behavior set to the given value. 1728 * 1729 * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow. 1730 * @return a new {@code CSVFormat} with duplicate header names behavior set to the given value. 1731 * @since 1.7 1732 */ 1733 public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) { 1734 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1735 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1736 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 1737 allowDuplicateHeaderNames); 1738 } 1739 1740 /** 1741 * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true} 1742 * 1743 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. 1744 * @see #withAllowMissingColumnNames(boolean) 1745 * @since 1.1 1746 */ 1747 public CSVFormat withAllowMissingColumnNames() { 1748 return this.withAllowMissingColumnNames(true); 1749 } 1750 1751 /** 1752 * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to the given value. 1753 * 1754 * @param allowMissingColumnNames 1755 * the missing column names behavior, {@code true} to allow missing column names in the header line, 1756 * {@code false} to cause an {@link IllegalArgumentException} to be thrown. 1757 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior. 1758 */ 1759 public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) { 1760 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1761 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1762 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 1763 allowDuplicateHeaderNames); 1764 } 1765 1766 /** 1767 * Returns a new {@code CSVFormat} with whether to flush on close. 1768 * 1769 * @param autoFlush 1770 * whether to flush on close. 1771 * 1772 * @return A new CSVFormat that is equal to this but with the specified autoFlush setting. 1773 * @since 1.6 1774 */ 1775 public CSVFormat withAutoFlush(final boolean autoFlush) { 1776 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1777 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1778 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 1779 allowDuplicateHeaderNames); 1780 } 1781 1782 /** 1783 * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character. 1784 * 1785 * Note that the comment start character is only recognized at the start of a line. 1786 * 1787 * @param commentMarker 1788 * the comment start marker 1789 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker 1790 * @throws IllegalArgumentException 1791 * thrown if the specified character is a line break 1792 */ 1793 public CSVFormat withCommentMarker(final char commentMarker) { 1794 return withCommentMarker(Character.valueOf(commentMarker)); 1795 } 1796 1797 /** 1798 * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character. 1799 * 1800 * Note that the comment start character is only recognized at the start of a line. 1801 * 1802 * @param commentMarker 1803 * the comment start marker, use {@code null} to disable 1804 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker 1805 * @throws IllegalArgumentException 1806 * thrown if the specified character is a line break 1807 */ 1808 public CSVFormat withCommentMarker(final Character commentMarker) { 1809 if (isLineBreak(commentMarker)) { 1810 throw new IllegalArgumentException("The comment start marker character cannot be a line break"); 1811 } 1812 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1813 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1814 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 1815 allowDuplicateHeaderNames); 1816 } 1817 1818 /** 1819 * Returns a new {@code CSVFormat} with the delimiter of the format set to the specified character. 1820 * 1821 * @param delimiter 1822 * the delimiter character 1823 * @return A new CSVFormat that is equal to this with the specified character as delimiter 1824 * @throws IllegalArgumentException 1825 * thrown if the specified character is a line break 1826 */ 1827 public CSVFormat withDelimiter(final char delimiter) { 1828 if (isLineBreak(delimiter)) { 1829 throw new IllegalArgumentException("The delimiter cannot be a line break"); 1830 } 1831 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 1832 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 1833 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 1834 allowDuplicateHeaderNames); 1835 } 1836 1837 /** 1838 * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character. 1839 * 1840 * @param escape 1841 * the escape character 1842 * @return A new CSVFormat that is equal to his but with the specified character as the escape character 1843 * @throws IllegalArgumentException 1844 * thrown if the specified character is a line break 1845 */ 1846 public CSVFormat withEscape(final char escape) { 1847 return withEscape(Character.valueOf(escape)); 1848 } 1849 1850 /** 1851 * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character. 1852 * 1853 * @param escape 1854 * the escape character, use {@code null} to disable 1855 * @return A new CSVFormat that is equal to this but with the specified character as the escape character 1856 * @throws IllegalArgumentException 1857 * thrown if the specified character is a line break 1858 */ 1859 public CSVFormat withEscape(final Character escape) { 1860 if (isLineBreak(escape)) { 1861 throw new IllegalArgumentException("The escape character cannot be a line break"); 1862 } 1863 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces, 1864 ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord, 1865 allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 1866 allowDuplicateHeaderNames); 1867 } 1868 1869 /** 1870 * Returns a new {@code CSVFormat} using the first record as header. 1871 * 1872 * <p> 1873 * Calling this method is equivalent to calling: 1874 * </p> 1875 * 1876 * <pre> 1877 * CSVFormat format = aFormat.withHeader().withSkipHeaderRecord(); 1878 * </pre> 1879 * 1880 * @return A new CSVFormat that is equal to this but using the first record as header. 1881 * @see #withSkipHeaderRecord(boolean) 1882 * @see #withHeader(String...) 1883 * @since 1.3 1884 */ 1885 public CSVFormat withFirstRecordAsHeader() { 1886 return withHeader().withSkipHeaderRecord(); 1887 } 1888 1889 /** 1890 * Returns a new {@code CSVFormat} with the header of the format defined by the enum class. 1891 * 1892 * <p> 1893 * Example: 1894 * </p> 1895 * 1896 * <pre> 1897 * public enum Header { 1898 * Name, Email, Phone 1899 * } 1900 * 1901 * CSVFormat format = aformat.withHeader(Header.class); 1902 * </pre> 1903 * <p> 1904 * The header is also used by the {@link CSVPrinter}. 1905 * </p> 1906 * 1907 * @param headerEnum 1908 * the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified 1909 * otherwise. 1910 * 1911 * @return A new CSVFormat that is equal to this but with the specified header 1912 * @see #withHeader(String...) 1913 * @see #withSkipHeaderRecord(boolean) 1914 * @since 1.3 1915 */ 1916 public CSVFormat withHeader(final Class<? extends Enum<?>> headerEnum) { 1917 String[] header = null; 1918 if (headerEnum != null) { 1919 final Enum<?>[] enumValues = headerEnum.getEnumConstants(); 1920 header = new String[enumValues.length]; 1921 for (int i = 0; i < enumValues.length; i++) { 1922 header[i] = enumValues[i].name(); 1923 } 1924 } 1925 return withHeader(header); 1926 } 1927 1928 /** 1929 * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can 1930 * either be parsed automatically from the input file with: 1931 * 1932 * <pre> 1933 * CSVFormat format = aformat.withHeader(); 1934 * </pre> 1935 * 1936 * or specified manually with: 1937 * 1938 * <pre> 1939 * CSVFormat format = aformat.withHeader(resultSet); 1940 * </pre> 1941 * <p> 1942 * The header is also used by the {@link CSVPrinter}. 1943 * </p> 1944 * 1945 * @param resultSet 1946 * the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified 1947 * otherwise. 1948 * 1949 * @return A new CSVFormat that is equal to this but with the specified header 1950 * @throws SQLException 1951 * SQLException if a database access error occurs or this method is called on a closed result set. 1952 * @since 1.1 1953 */ 1954 public CSVFormat withHeader(final ResultSet resultSet) throws SQLException { 1955 return withHeader(resultSet != null ? resultSet.getMetaData() : null); 1956 } 1957 1958 /** 1959 * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can 1960 * either be parsed automatically from the input file with: 1961 * 1962 * <pre> 1963 * CSVFormat format = aformat.withHeader(); 1964 * </pre> 1965 * 1966 * or specified manually with: 1967 * 1968 * <pre> 1969 * CSVFormat format = aformat.withHeader(metaData); 1970 * </pre> 1971 * <p> 1972 * The header is also used by the {@link CSVPrinter}. 1973 * </p> 1974 * 1975 * @param metaData 1976 * the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified 1977 * otherwise. 1978 * 1979 * @return A new CSVFormat that is equal to this but with the specified header 1980 * @throws SQLException 1981 * SQLException if a database access error occurs or this method is called on a closed result set. 1982 * @since 1.1 1983 */ 1984 public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLException { 1985 String[] labels = null; 1986 if (metaData != null) { 1987 final int columnCount = metaData.getColumnCount(); 1988 labels = new String[columnCount]; 1989 for (int i = 0; i < columnCount; i++) { 1990 labels[i] = metaData.getColumnLabel(i + 1); 1991 } 1992 } 1993 return withHeader(labels); 1994 } 1995 1996 /** 1997 * Returns a new {@code CSVFormat} with the header of the format set to the given values. The header can either be 1998 * parsed automatically from the input file with: 1999 * 2000 * <pre> 2001 * CSVFormat format = aformat.withHeader(); 2002 * </pre> 2003 * 2004 * or specified manually with: 2005 * 2006 * <pre> 2007 * CSVFormat format = aformat.withHeader("name", "email", "phone"); 2008 * </pre> 2009 * <p> 2010 * The header is also used by the {@link CSVPrinter}. 2011 * </p> 2012 * 2013 * @param header 2014 * the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise. 2015 * 2016 * @return A new CSVFormat that is equal to this but with the specified header 2017 * @see #withSkipHeaderRecord(boolean) 2018 */ 2019 public CSVFormat withHeader(final String... header) { 2020 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2021 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2022 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2023 allowDuplicateHeaderNames); 2024 } 2025 2026 /** 2027 * Returns a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will 2028 * be printed first, before the headers. This setting is ignored by the parser. 2029 * 2030 * <pre> 2031 * CSVFormat format = aformat.withHeaderComments("Generated by Apache Commons CSV 1.1.", new Date()); 2032 * </pre> 2033 * 2034 * @param headerComments 2035 * the headerComments which will be printed by the Printer before the actual CSV data. 2036 * 2037 * @return A new CSVFormat that is equal to this but with the specified header 2038 * @see #withSkipHeaderRecord(boolean) 2039 * @since 1.1 2040 */ 2041 public CSVFormat withHeaderComments(final Object... headerComments) { 2042 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2043 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2044 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2045 allowDuplicateHeaderNames); 2046 } 2047 2048 /** 2049 * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}. 2050 * 2051 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. 2052 * @since {@link #withIgnoreEmptyLines(boolean)} 2053 * @since 1.1 2054 */ 2055 public CSVFormat withIgnoreEmptyLines() { 2056 return this.withIgnoreEmptyLines(true); 2057 } 2058 2059 /** 2060 * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value. 2061 * 2062 * @param ignoreEmptyLines 2063 * the empty line skipping behavior, {@code true} to ignore the empty lines between the records, 2064 * {@code false} to translate empty lines to empty records. 2065 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior. 2066 */ 2067 public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) { 2068 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2069 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2070 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2071 allowDuplicateHeaderNames); 2072 } 2073 2074 /** 2075 * Returns a new {@code CSVFormat} with the header ignore case behavior set to {@code true}. 2076 * 2077 * @return A new CSVFormat that will ignore case header name. 2078 * @see #withIgnoreHeaderCase(boolean) 2079 * @since 1.3 2080 */ 2081 public CSVFormat withIgnoreHeaderCase() { 2082 return this.withIgnoreHeaderCase(true); 2083 } 2084 2085 /** 2086 * Returns a new {@code CSVFormat} with whether header names should be accessed ignoring case. 2087 * 2088 * @param ignoreHeaderCase 2089 * the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as 2090 * is. 2091 * @return A new CSVFormat that will ignore case header name if specified as {@code true} 2092 * @since 1.3 2093 */ 2094 public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) { 2095 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2096 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2097 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2098 allowDuplicateHeaderNames); 2099 } 2100 2101 /** 2102 * Returns a new {@code CSVFormat} with the parser trimming behavior of the format set to {@code true}. 2103 * 2104 * @return A new CSVFormat that is equal to this but with the specified parser trimming behavior. 2105 * @see #withIgnoreSurroundingSpaces(boolean) 2106 * @since 1.1 2107 */ 2108 public CSVFormat withIgnoreSurroundingSpaces() { 2109 return this.withIgnoreSurroundingSpaces(true); 2110 } 2111 2112 /** 2113 * Returns a new {@code CSVFormat} with the parser trimming behavior of the format set to the given value. 2114 * 2115 * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces, 2116 * {@code false} to leave the spaces as is. 2117 * @return A new CSVFormat that is equal to this but with the specified trimming behavior. 2118 */ 2119 public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) { 2120 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2121 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2122 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2123 allowDuplicateHeaderNames); 2124 } 2125 2126 /** 2127 * Returns a new {@code CSVFormat} with conversions to and from null for strings on input and output. 2128 * <ul> 2129 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading 2130 * records.</li> 2131 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li> 2132 * </ul> 2133 * 2134 * @param nullString 2135 * the String to convert to and from {@code null}. No substitution occurs if {@code null} 2136 * 2137 * @return A new CSVFormat that is equal to this but with the specified null conversion string. 2138 */ 2139 public CSVFormat withNullString(final String nullString) { 2140 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2141 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2142 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2143 allowDuplicateHeaderNames); 2144 } 2145 2146 /** 2147 * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character. 2148 * 2149 * @param quoteChar 2150 * the quoteChar character 2151 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar 2152 * @throws IllegalArgumentException 2153 * thrown if the specified character is a line break 2154 */ 2155 public CSVFormat withQuote(final char quoteChar) { 2156 return withQuote(Character.valueOf(quoteChar)); 2157 } 2158 2159 /** 2160 * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character. 2161 * 2162 * @param quoteChar 2163 * the quoteChar character, use {@code null} to disable 2164 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar 2165 * @throws IllegalArgumentException 2166 * thrown if the specified character is a line break 2167 */ 2168 public CSVFormat withQuote(final Character quoteChar) { 2169 if (isLineBreak(quoteChar)) { 2170 throw new IllegalArgumentException("The quoteChar cannot be a line break"); 2171 } 2172 return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces, 2173 ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord, 2174 allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2175 allowDuplicateHeaderNames); 2176 } 2177 2178 /** 2179 * Returns a new {@code CSVFormat} with the output quote policy of the format set to the specified value. 2180 * 2181 * @param quoteModePolicy 2182 * the quote policy to use for output. 2183 * 2184 * @return A new CSVFormat that is equal to this but with the specified quote policy 2185 */ 2186 public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) { 2187 return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter, 2188 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2189 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2190 allowDuplicateHeaderNames); 2191 } 2192 2193 /** 2194 * Returns a new {@code CSVFormat} with the record separator of the format set to the specified character. 2195 * 2196 * <p> 2197 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently 2198 * only works for inputs with '\n', '\r' and "\r\n" 2199 * </p> 2200 * 2201 * @param recordSeparator 2202 * the record separator to use for output. 2203 * 2204 * @return A new CSVFormat that is equal to this but with the specified output record separator 2205 */ 2206 public CSVFormat withRecordSeparator(final char recordSeparator) { 2207 return withRecordSeparator(String.valueOf(recordSeparator)); 2208 } 2209 2210 /** 2211 * Returns a new {@code CSVFormat} with the record separator of the format set to the specified String. 2212 * 2213 * <p> 2214 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently 2215 * only works for inputs with '\n', '\r' and "\r\n" 2216 * </p> 2217 * 2218 * @param recordSeparator 2219 * the record separator to use for output. 2220 * 2221 * @return A new CSVFormat that is equal to this but with the specified output record separator 2222 * @throws IllegalArgumentException 2223 * if recordSeparator is none of CR, LF or CRLF 2224 */ 2225 public CSVFormat withRecordSeparator(final String recordSeparator) { 2226 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2227 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2228 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2229 allowDuplicateHeaderNames); 2230 } 2231 2232 /** 2233 * Returns a new {@code CSVFormat} with skipping the header record set to {@code true}. 2234 * 2235 * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. 2236 * @see #withSkipHeaderRecord(boolean) 2237 * @see #withHeader(String...) 2238 * @since 1.1 2239 */ 2240 public CSVFormat withSkipHeaderRecord() { 2241 return this.withSkipHeaderRecord(true); 2242 } 2243 2244 /** 2245 * Returns a new {@code CSVFormat} with whether to skip the header record. 2246 * 2247 * @param skipHeaderRecord 2248 * whether to skip the header record. 2249 * 2250 * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting. 2251 * @see #withHeader(String...) 2252 */ 2253 public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) { 2254 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2255 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2256 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2257 allowDuplicateHeaderNames); 2258 } 2259 2260 /** 2261 * Returns a new {@code CSVFormat} with the record separator of the format set to the operating system's line 2262 * separator string, typically CR+LF on Windows and LF on Linux. 2263 * 2264 * <p> 2265 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently 2266 * only works for inputs with '\n', '\r' and "\r\n" 2267 * </p> 2268 * 2269 * @return A new CSVFormat that is equal to this but with the operating system's line separator string. 2270 * @since 1.6 2271 */ 2272 public CSVFormat withSystemRecordSeparator() { 2273 return withRecordSeparator(System.getProperty("line.separator")); 2274 } 2275 2276 /** 2277 * Returns a new {@code CSVFormat} to add a trailing delimiter. 2278 * 2279 * @return A new CSVFormat that is equal to this but with the trailing delimiter setting. 2280 * @since 1.3 2281 */ 2282 public CSVFormat withTrailingDelimiter() { 2283 return withTrailingDelimiter(true); 2284 } 2285 2286 /** 2287 * Returns a new {@code CSVFormat} with whether to add a trailing delimiter. 2288 * 2289 * @param trailingDelimiter 2290 * whether to add a trailing delimiter. 2291 * 2292 * @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting. 2293 * @since 1.3 2294 */ 2295 public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) { 2296 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2297 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2298 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2299 allowDuplicateHeaderNames); 2300 } 2301 2302 /** 2303 * Returns a new {@code CSVFormat} to trim leading and trailing blanks. 2304 * See {@link #getTrim()} for details of where this is used. 2305 * 2306 * @return A new CSVFormat that is equal to this but with the trim setting on. 2307 * @since 1.3 2308 */ 2309 public CSVFormat withTrim() { 2310 return withTrim(true); 2311 } 2312 2313 /** 2314 * Returns a new {@code CSVFormat} with whether to trim leading and trailing blanks. 2315 * See {@link #getTrim()} for details of where this is used. 2316 * 2317 * @param trim 2318 * whether to trim leading and trailing blanks. 2319 * 2320 * @return A new CSVFormat that is equal to this but with the specified trim setting. 2321 * @since 1.3 2322 */ 2323 public CSVFormat withTrim(final boolean trim) { 2324 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter, 2325 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header, 2326 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush, 2327 allowDuplicateHeaderNames); 2328 } 2329}