influxdata · danielnelson · Aug 24, 2018 · Jul 17, 2018 · Jul 18, 2018 · Jul 19, 2018
diff --git a/docs/DATA_FORMATS_INPUT.md b/docs/DATA_FORMATS_INPUT.md
@@ -2,6 +2,17 @@
 
 Telegraf is able to parse the following input data formats into metrics:
 
+<<<<<<< HEAD
+1. [InfluxDB Line Protocol](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#influx)
+1. [JSON](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#json)
+1. [Graphite](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#graphite)
+1. [Value](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#value), ie: 45 or "booyah"
+1. [Nagios](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#nagios) (exec input only)
+1. [Collectd](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#collectd)
+1. [Dropwizard](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#dropwizard)
+1. [Grok](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#grok)
+1. [CSV](https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md#csv)
+=======
 1. [InfluxDB Line Protocol](#influx)
 1. [JSON](#json)
 1. [Graphite](#graphite)
@@ -12,6 +23,7 @@ Telegraf is able to parse the following input data formats into metrics:
 1. [Grok](#grok)
 1. [Logfmt](#logfmt)
 1. [Wavefront](#wavefront)
+>>>>>>> master
 
 Telegraf metrics, like InfluxDB
 [points](https://docs.influxdata.com/influxdb/v0.10/write_protocols/line/),
@@ -671,6 +683,50 @@ The best way to get acquainted with grok patterns is to read the logstash docs,
 which are available here:
   https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html
 
+#### Grok Configuration:
+```toml
+[[inputs.file]]
+  ## Files to parse each interval.
+  ## These accept standard unix glob matching rules, but with the addition of
+  ## ** as a "super asterisk". ie:
+  ##   /var/log/**.log     -> recursively find all .log files in /var/log
+  ##   /var/log/*/*.log    -> find all .log files with a parent dir in /var/log
+  ##   /var/log/apache.log -> only tail the apache log file
+  files = ["/var/log/apache/access.log"]
+
+  ## The dataformat to be read from files
+  ## Each data format has its own unique set of configuration options, read
+  ## more about them here:
+  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
+  data_format = "grok"
+
+  ## This is a list of patterns to check the given log file(s) for.
+  ## Note that adding patterns here increases processing time. The most
+  ## efficient configuration is to have one pattern.
+  ## Other common built-in patterns are:
+  ##   %{COMMON_LOG_FORMAT}   (plain apache & nginx access logs)
+  ##   %{COMBINED_LOG_FORMAT} (access logs + referrer & agent)
+  grok_patterns = ["%{COMBINED_LOG_FORMAT}"]
+
+  ## Full path(s) to custom pattern files.
+  grok_custom_pattern_files = []
+
+  ## Custom patterns can also be defined here. Put one pattern per line.
+  grok_custom_patterns = '''
+  '''
+
+  ## Timezone allows you to provide an override for timestamps that
+  ## don't already include an offset
+  ## e.g. 04/06/2016 12:41:45 data one two 5.43µs
+  ##
+  ## Default: "" which renders UTC
+  ## Options are as follows:
+  ##   1. Local             -- interpret based on machine localtime
+  ##   2. "Canada/Eastern"  -- Unix TZ values like those found in https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
+  ##   3. UTC               -- or blank/unspecified, will return timestamp in UTC
+  grok_timezone = "Canada/Eastern"
+```
+
 The grok parser uses a slightly modified version of logstash "grok"
 patterns, with the format:
 
@@ -821,46 +877,86 @@ will be processed based on the current machine timezone configuration. Lastly, i
 timezone from the list of Unix [timezones](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones),
 grok will offset the timestamp accordingly.
 
-#### TOML Escaping
+# CSV
+Parse out metrics from a CSV formatted table. By default, the parser assumes there is no header and
+will read data from the first line. If `csv_header_row_count` is set to anything besides 0, the parser
+will extract column names from the first number of rows. Headers of more than 1 row will have their 
+names concatenated together.  Any unnamed columns will be ignored by the parser.
 
-When saving patterns to the configuration file, keep in mind the different TOML
-[string](https://github.com/toml-lang/toml#string) types and the escaping
-rules for each.  These escaping rules must be applied in addition to the
-escaping required by the grok syntax.  Using the Multi-line line literal
-syntax with `'''` may be useful.
+The `csv_skip_rows` config indicates the number of rows to skip before looking for header information or data
+to parse. By default, no rows will be skipped.
 
-The following config examples will parse this input file:
+The `csv_skip_columns` config indicates the number of columns to be skipped before parsing data. These 
+columns will not be read out of the header.  Naming with the `csv_column_names` will begin at the first
+parsed column after skipping the indicated columns.  By default, no columns are skipped.
 
-```
-|42|\uD83D\uDC2F|'telegraf'|
-```
+To assign custom column names, the `csv_column_names` config is available. If the `csv_column_names` 
+config is used, all columns must be named as additional columns will be ignored. If `csv_header_row_count` 
+is set to 0, `csv_column_names` must be specified.  Names listed in `csv_column_names` will override names extracted
+from the header.
 
-Since `|` is a special character in the grok language, we must escape it to
-get a literal `|`.  With a basic TOML string, special characters such as
-backslash must be escaped, requiring us to escape the backslash a second time.
+The `csv_tag_columns` and `csv_field_columns` configs are available to add the column data to the metric.
+The name used to specify the column is the name in the header, or if specified, the corresponding 
+name assigned in `csv_column_names`. If neither config is specified, no data will be added to the metric.
 
-```toml
-[[inputs.file]]
-  grok_patterns = ["\\|%{NUMBER:value:int}\\|%{UNICODE_ESCAPE:escape}\\|'%{WORD:name}'\\|"]
-  grok_custom_patterns = "UNICODE_ESCAPE (?:\\\\u[0-9A-F]{4})+"
-```
+Additional configs are available to dynamically name metrics and set custom timestamps.  If the 
+`csv_column_names` config is specified, the parser will assign the metric name to the value found 
+in that column. If the `csv_timestamp_column` is specified, the parser will extract the timestamp from
+that column. If `csv_timestamp_column` is specified, the `csv_timestamp_format` must also be specified
+or an error will be thrown.
 
-We cannot use a literal TOML string for the pattern, because we cannot match a
-`'` within it.  However, it works well for the custom pattern.
+#### CSV Configuration
 ```toml
-[[inputs.file]]
-  grok_patterns = ["\\|%{NUMBER:value:int}\\|%{UNICODE_ESCAPE:escape}\\|'%{WORD:name}'\\|"]
-  grok_custom_patterns = 'UNICODE_ESCAPE (?:\\u[0-9A-F]{4})+'
-```
-
-A multi-line literal string allows us to encode the pattern:
-```toml
-[[inputs.file]]
-  grok_patterns = ['''
-    \|%{NUMBER:value:int}\|%{UNICODE_ESCAPE:escape}\|'%{WORD:name}'\|
-  ''']
-  grok_custom_patterns = 'UNICODE_ESCAPE (?:\\u[0-9A-F]{4})+'
-```
+  data_format = "csv"
+
+  ## Indicates how many rows to treat as a header. By default, the parser assumes 
+  ## there is no header and will parse the first row as data. If set to anything more
+  ## than 1, column names will be concatenated with the name listed in the next header row.
+  ## If `csv_column_names` is specified, the column names in header will be overridden.
+  # csv_header_row_count = 0
+
+  ## Indicates the number of rows to skip before looking for header information.
+  # csv_skip_rows = 0
+
+  ## Indicates the number of columns to skip before looking for data to parse.
+  ## These columns will be skipped in the header as well.
+  # csv_skip_columns = 0
+
+  ## The seperator between csv fields
+  ## By default, the parser assumes a comma (",")
+  # csv_delimiter = ","
+
+  ## The character reserved for marking a row as a comment row
+  ## Commented rows are skipped and not parsed
+  # csv_comment = ""
+
+  ## If set to true, the parser will remove leading whitespace from fields
+  ## By default, this is false
+  # csv_trim_space = false
+
+  ## For assigning custom names to columns
+  ## If this is specified, all columns should have a name
+  ## Unnamed columns will be ignored by the parser.
+  ## If `csv_header_row_count` is set to 0, this config must be used
+  csv_column_names = []
+
+  ## Columns listed here will be added as tags. Any other columns
+  ## will be added as fields.
+  csv_tag_columns = []
+
+  ## The column to extract the name of the metric from
+  ## By default, this is the name of the plugin
+  ## the `name_override` config overrides this
+  # csv_measurement_column = ""
+
+  ## The column to extract time information for the metric
+  ## `csv_timestamp_format` must be specified if this is used
+  # csv_timestamp_column = ""
+
+  ## The format of time data extracted from `csv_timestamp_column`
+  ## this must be specified if `csv_timestamp_column` is specified
+  # csv_timestamp_format = ""
+  ```
 
 #### Tips for creating patterns
 

diff --git a/internal/config/config.go b/internal/config/config.go
@@ -1399,6 +1399,120 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) {
 		}
 	}
 
+	//for csv parser
+	if node, ok := tbl.Fields["csv_column_names"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if ary, ok := kv.Value.(*ast.Array); ok {
+				for _, elem := range ary.Value {
+					if str, ok := elem.(*ast.String); ok {
+						c.CSVColumnNames = append(c.CSVColumnNames, str.Value)
+					}
+				}
+			}
+		}
+	}
+
+	if node, ok := tbl.Fields["csv_tag_columns"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if ary, ok := kv.Value.(*ast.Array); ok {
+				for _, elem := range ary.Value {
+					if str, ok := elem.(*ast.String); ok {
+						c.CSVTagColumns = append(c.CSVTagColumns, str.Value)
+					}
+				}
+			}
+		}
+	}
+
+	if node, ok := tbl.Fields["csv_delimiter"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if str, ok := kv.Value.(*ast.String); ok {
+				c.CSVDelimiter = str.Value
+			}
+		}
+	}
+
+	if node, ok := tbl.Fields["csv_comment"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if str, ok := kv.Value.(*ast.String); ok {
+				c.CSVComment = str.Value
+			}
+		}
+	}
+
+	if node, ok := tbl.Fields["csv_measurement_column"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if str, ok := kv.Value.(*ast.String); ok {
+				c.CSVMeasurementColumn = str.Value
+			}
+		}
+	}
+
+	if node, ok := tbl.Fields["csv_timestamp_column"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if str, ok := kv.Value.(*ast.String); ok {
+				c.CSVTimestampColumn = str.Value
+			}
+		}
+	}
+
+	if node, ok := tbl.Fields["csv_timestamp_format"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if str, ok := kv.Value.(*ast.String); ok {
+				c.CSVTimestampFormat = str.Value
+			}
+		}
+	}
+
+	if node, ok := tbl.Fields["csv_header_row_count"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if str, ok := kv.Value.(*ast.String); ok {
+				iVal, err := strconv.Atoi(str.Value)
+				c.CSVHeaderRowCount = iVal
+				if err != nil {
+					return nil, fmt.Errorf("E! parsing to int: %v", err)
+				}
+			}
+		}
+	}
+
+	if node, ok := tbl.Fields["csv_skip_rows"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if str, ok := kv.Value.(*ast.String); ok {
+				iVal, err := strconv.Atoi(str.Value)
+				c.CSVSkipRows = iVal
+				if err != nil {
+					return nil, fmt.Errorf("E! parsing to int: %v", err)
+				}
+			}
+		}
+	}
+
+	if node, ok := tbl.Fields["csv_skip_columns"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if str, ok := kv.Value.(*ast.String); ok {
+				iVal, err := strconv.Atoi(str.Value)
+				c.CSVSkipColumns = iVal
+				if err != nil {
+					return nil, fmt.Errorf("E! parsing to int: %v", err)
+				}
+			}
+		}
+	}
+
+	if node, ok := tbl.Fields["csv_trim_space"]; ok {
+		if kv, ok := node.(*ast.KeyValue); ok {
+			if str, ok := kv.Value.(*ast.Boolean); ok {
+				//for config with no quotes
+				val, err := strconv.ParseBool(str.Value)
+				c.CSVTrimSpace = val
+				if err != nil {
+					return nil, fmt.Errorf("E! parsing to bool: %v", err)
+				}
+			}
+		}
+	}
+
 	c.MetricName = name
 
 	delete(tbl.Fields, "data_format")
@@ -1420,6 +1534,14 @@ func buildParser(name string, tbl *ast.Table) (parsers.Parser, error) {
 	delete(tbl.Fields, "grok_custom_patterns")
 	delete(tbl.Fields, "grok_custom_pattern_files")
 	delete(tbl.Fields, "grok_timezone")
+	delete(tbl.Fields, "csv_data_columns")
+	delete(tbl.Fields, "csv_tag_columns")
+	delete(tbl.Fields, "csv_field_columns")
+	delete(tbl.Fields, "csv_name_column")
+	delete(tbl.Fields, "csv_timestamp_column")
+	delete(tbl.Fields, "csv_timestamp_format")
+	delete(tbl.Fields, "csv_delimiter")
+	delete(tbl.Fields, "csv_header")
 
 	return parsers.NewParser(c)
 }