This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
input { | |
file{ | |
debug => true | |
path => ["/Users/gpzpati/Downloads/apache.log"] | |
start_position => "beginning" | |
} | |
} | |
filter { | |
grok { | |
# See the following URL for a complete list of named patterns | |
# logstash/grok ships with by default: | |
# https://github.com/logstash/logstash/tree/master/patterns | |
# | |
# The grok filter will use the below pattern and on successful match use | |
# any captured values as new fields in the event. | |
pattern => "%{COMBINEDAPACHELOG}" | |
} | |
date { | |
type => "apache" | |
# Try to pull the timestamp from the 'timestamp' field (parsed above with | |
# grok). The apache time format looks like: "18/Aug/2011:05:44:34 -0700" | |
match => [ "timestamp", "dd/MMM/yyyy:HH:mm:ss Z" ] | |
} | |
geoip { | |
database => "/Users/gpzpati/Downloads/GeoLiteCity.dat" | |
add_tag => [ "geoip" ] | |
source => "clientip" | |
} | |
mutate { | |
tags => [ "geoip" ] | |
# 'coords' will be kept, 'tmplat' is temporary. | |
# Both of these new fields are strings. | |
add_field => [ "coords", "%{geoip.longitude}", | |
"tmplat", "%{geoip.latitude}" ] | |
} | |
mutate { | |
tags => [ "geoip" ] | |
# Merge 'tmplat' into 'coords' | |
merge => [ "coords", "tmplat" ] | |
} | |
mutate { | |
tags => [ "geoip" ] | |
# Convert our new array of strings back to float | |
convert => [ "coords", "float" ] | |
# Delete our temporary latitude field | |
remove => [ "tmplat" ] | |
} | |
} | |
output { | |
stdout { | |
codec => rubydebug | |
} | |
elasticsearch{ | |
host => "127.0.0.1" | |
} | |
} |
java -jar logstash-1.3.2-flatjar.jar agent -f httpaccess.conf
Once logstash server was started i could see how it was parsing logs and posting them in elasticsearch. For example for the following log statement
129.143.71.36 - - [31/Aug/2011:08:35:17 -0700] "GET /favicon.ico HTTP/1.1" 200 3935 "-" "Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.224 Safari/534.10"
I could see logstash converting it into following JSON before posting it into elasticsearch
1 comment:
hi sunil , actually i follow your documentation for map reduce and doing the same like you, only difference is i ma using GeoIP2-City.mmdb this file and "DatabaseReader reader = new DatabaseReader.Builder(cityFile).withCache(new CHMCache()).build();" i am getting jackson bind exception on aws emr .....
Post a Comment