This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
for line in sys.stdin: | |
sys.stdout.write("Python "+line) |
lines.pipe("python echo.py")
which says that pass every line in the RDD to python echo.py
. and collect the output. Now there is nothing specific to python here, instead you could use any executable.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.spnotes | |
import org.apache.spark.{SparkContext, SparkConf} | |
object ScalaPython { | |
def main(argv:Array[String]): Unit ={ | |
System.out.println("Entering ScalaPython.main") | |
if(argv.length != 1){ | |
println("Please provide 1 parameters <inputFileName>") | |
System.exit(1) | |
} | |
val fileName = argv(0) | |
val sparkConf = new SparkConf().setAppName("ScalaPython").setMaster("local") | |
val sparkContext = new SparkContext(sparkConf) | |
val lines = sparkContext.textFile(fileName) | |
println("Print lines from original file") | |
lines.foreach(println) | |
val lines1 = lines.pipe("python echo.py") | |
println("Print lines returned from python") | |
lines1.foreach(println) | |
System.out.println("Exiting ScalaPython.main") | |
} | |
} |
bin/spark-submit
--files echo.py
ScalaPython-1.0-SNAPSHOT-jar-with-dependencies.jar helloworld.txt
3 comments:
is any there example of doing the same with a dataframe?
I like your post very much. It is nice useful for my research. I wish for you to share more info about this. Keep blogging Apache Kafka Training in Electronic City
Thanks Sunil, Really Helpful!!
Post a Comment