spark-1.6.1-pushdown-0004.patch

diff --git a/core/pom.xml b/core/pom.xml
index 1399103..14e504c 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -267,6 +267,28 @@
       <artifactId>oro</artifactId>
       <version>${oro.version}</version>
     </dependency>
+    <!--dependency>
+          <groupId>com.ibm.swift</groupId>
+          <artifactId>swift-driver</artifactId>
+      </dependency-->
+    <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-openstack</artifactId>
+     <exclusions>
+        <exclusion>
+          <groupId>javax.servlet</groupId>
+          <artifactId>servlet-api</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>org.codehaus.jackson</groupId>
+          <artifactId>jackson-mapper-asl</artifactId>
+        </exclusion>
+         <exclusion>
+            <groupId>org.mockito</groupId>
+            <artifactId>mockito-all</artifactId>
+          </exclusion>
+      </exclusions>
+    </dependency>
     <dependency>
       <groupId>org.tachyonproject</groupId>
       <artifactId>tachyon-client</artifactId>
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index f37c95b..c6b0e42 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -138,6 +138,20 @@ class HadoopRDD[K, V](
   private val createTime = new Date()
 
   private val shouldCloneJobConf = sparkContext.conf.getBoolean("spark.hadoop.cloneConf", false)
+  private val sqlPushdownEnabled = sparkContext.conf.getBoolean("spark.pushdown", false)
+
+  // YM Start
+  final val NotINITIALIZED = "NotInitialized"
+  private var thePredicate = NotINITIALIZED
+  private var theProjection = NotINITIALIZED
+  // YM End
+
+  override def setPredicate(projection: String, predicate: String): String = {
+    logWarning("set predicate: projection=" + projection +", predicate=" + predicate + " sqlPushdownEnabled = " + sqlPushdownEnabled)
+    thePredicate  = predicate
+    theProjection = projection
+    ""
+  }
 
   // Returns a JobConf that will be used on slaves to obtain input splits for Hadoop reads.
   protected def getJobConf(): JobConf = {
@@ -205,12 +219,29 @@ class HadoopRDD[K, V](
   }
 
   override def compute(theSplit: Partition, context: TaskContext): InterruptibleIterator[(K, V)] = {
+    logWarning("compute thePredicate = " + thePredicate + " theProjection = " + theProjection)
     val iter = new NextIterator[(K, V)] {
 
       val split = theSplit.asInstanceOf[HadoopPartition]
       logInfo("Input split: " + split.inputSplit)
       val jobConf = getJobConf()
 
+      if (!NotINITIALIZED.equals(thePredicate)) {
+          logWarning("Setting jobConf swift.filter.predicate with thePredicate = " + thePredicate)
+          jobConf.set("swift.filter.predicate", thePredicate)
+      } else {
+        logWarning("thePredicate is not initialized !!!")
+        jobConf.set("swift.filter.predicate", "")
+      }
+
+      if (!NotINITIALIZED.equals(theProjection)) {
+          logWarning("Setting jobConf swift.filter.projection with theProjection = " + theProjection)
+          jobConf.set("swift.filter.columns", theProjection)
+      } else {
+        logWarning("theProjection is NotInitialized !!!")
+        jobConf.set("swift.filter.columns", "")
+      }
+
       val inputMetrics = context.taskMetrics.getInputMetricsForReadMethod(DataReadMethod.Hadoop)
 
       // Sets the thread local variable for the file's name
@@ -251,6 +282,10 @@ class HadoopRDD[K, V](
         if (!finished) {
           inputMetrics.incRecordsRead(1)
         }
+        // else {
+        //   logWarning("YYYEEE " + theSplit.index + " FINISHED for me ")
+        // }
+        // logWarning("YYYEEE " + theSplit.index + " next record = " + key + " value=" + value)
         (key, value)
       }
 
@@ -424,4 +459,5 @@ private[spark] object HadoopRDD extends Logging {
     }}
     out.seq
   }
+
 }
diff --git a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
index e4587c9..61ea2ed 100644
--- a/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
@@ -34,6 +34,10 @@ private[spark] class MapPartitionsRDD[U: ClassTag, T: ClassTag](
 
   override def getPartitions: Array[Partition] = firstParent[T].partitions
 
+  override def setPredicate(projection: String, predicate: String): String = {
+    firstParent[T].setPredicate(projection, predicate)
+    ""
+  }
   override def compute(split: Partition, context: TaskContext): Iterator[U] =
     f(context, split.index, firstParent[T].iterator(split, context))
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 2fb8047..803b5aa 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -126,6 +126,8 @@ abstract class RDD[T: ClassTag](
    */
   protected def getPreferredLocations(split: Partition): Seq[String] = Nil
 
+  def setPredicate(projection: String, predicate: String): String = ""
+
   /** Optionally overridden by subclasses to specify how they are partitioned. */
   @transient val partitioner: Option[Partitioner] = None
 
diff --git a/pom.xml b/pom.xml
index b1a5d28..35d9abd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1,20 +1,20 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
+~ Licensed to the Apache Software Foundation (ASF) under one or more
+~ contributor license agreements.  See the NOTICE file distributed with
+~ this work for additional information regarding copyright ownership.
+~ The ASF licenses this file to You under the Apache License, Version 2.0
+~ (the "License"); you may not use this file except in compliance with
+~ the License.  You may obtain a copy of the License at
+~
+~    http://www.apache.org/licenses/LICENSE-2.0
+~
+~ Unless required by applicable law or agreed to in writing, software
+~ distributed under the License is distributed on an "AS IS" BASIS,
+~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+~ See the License for the specific language governing permissions and
+~ limitations under the License.
+-->
 
 <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
@@ -189,12 +189,12 @@
     <test.exclude.tags></test.exclude.tags>
 
     <!--
-      Dependency scopes that can be overridden by enabling certain profiles. These profiles are
-      declared in the projects that build assemblies.
+	Dependency scopes that can be overridden by enabling certain profiles. These profiles are
+	declared in the projects that build assemblies.
 
-      For other projects the scope should remain as "compile", otherwise they are not available
-      during compilation if the dependency is transivite (e.g. "bagel/" depending on "core/" and
-      needing Hadoop classes in the classpath to compile).
+For other projects the scope should remain as "compile", otherwise they are not available
+during compilation if the dependency is transivite (e.g. "bagel/" depending on "core/" and
+needing Hadoop classes in the classpath to compile).
     -->
     <flume.deps.scope>compile</flume.deps.scope>
     <hadoop.deps.scope>compile</hadoop.deps.scope>
@@ -204,8 +204,8 @@
     <parquet.test.deps.scope>test</parquet.test.deps.scope>
 
     <!--
-      Overridable test home. So that you can call individual pom files directly without
-      things breaking.
+	Overridable test home. So that you can call individual pom files directly without
+	things breaking.
     -->
     <spark.test.home>${session.executionRootDirectory}</spark.test.home>
 
@@ -328,8 +328,8 @@
   </pluginRepositories>
   <dependencies>
     <!--
-      This is a dummy dependency that is used along with the shading plug-in
-      to create effective poms on publishing (see SPARK-3812).
+	This is a dummy dependency that is used along with the shading plug-in
+	to create effective poms on publishing (see SPARK-3812).
     -->
     <dependency>
       <groupId>org.spark-project.spark</groupId>
@@ -337,8 +337,8 @@
       <version>1.0.0</version>
     </dependency>
     <!--
-         This is needed by the scalatest plugin, and so is declared here to be available in
-         all child modules, just as scalatest is run in all children
+        This is needed by the scalatest plugin, and so is declared here to be available in
+        all child modules, just as scalatest is run in all children
     -->
     <dependency>
       <groupId>org.scalatest</groupId>
@@ -912,6 +912,23 @@
         </exclusions>
       </dependency>
       <dependency>
+        <groupId>com.databricks</groupId>
+        <artifactId>spark-csv_2.10</artifactId>
+        <version>1.2.0</version>
+      </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-openstack</artifactId>
+        <version>${hadoop.version}</version>
+        <scope>${hadoop.deps.scope}</scope>
+      </dependency>
+      <!--dependency>
+          <groupId>com.ibm.swift</groupId>
+          <artifactId>swift-driver</artifactId>
+          <version>1.0.0</version>
+          <scope>${hadoop.deps.scope}</scope>
+      </dependency-->
+      <dependency>
         <groupId>org.apache.avro</groupId>
         <artifactId>avro</artifactId>
         <version>${avro.version}</version>
@@ -1310,9 +1327,9 @@
       <dependency>
         <groupId>${hive.group}</groupId>
         <artifactId>hive-exec</artifactId>
-<!--
-        <classifier>core</classifier>
--->
+	<!--
+            <classifier>core</classifier>
+	-->
         <version>${hive.version}</version>
         <scope>${hive.deps.scope}</scope>
         <exclusions>
@@ -1337,7 +1354,7 @@
           </exclusion>
 
           <!-- excluded dependencies & transitive.
-           Some may be needed to be explicitly included-->
+               Some may be needed to be explicitly included-->
           <exclusion>
             <groupId>ant</groupId>
             <artifactId>ant</artifactId>
@@ -1621,8 +1638,8 @@
       </dependency>
 
       <!-- hive shims pulls in hive 0.23 and a transitive dependency of the Hadoop version
-        Hive was built against. This dependency cuts out the YARN/hadoop dependency, which
-        is needed by Hive to submit work to a YARN cluster.-->
+           Hive was built against. This dependency cuts out the YARN/hadoop dependency, which
+           is needed by Hive to submit work to a YARN cluster.-->
       <dependency>
         <groupId>${hive.group}</groupId>
         <artifactId>hive-shims</artifactId>
@@ -1761,7 +1778,7 @@
             <artifactId>janino</artifactId>
           </exclusion>
           <!-- hsqldb interferes with the use of derby as the default db
-            in hive's use of datanucleus.
+               in hive's use of datanucleus.
           -->
           <exclusion>
             <groupId>org.hsqldb</groupId>
@@ -1987,8 +2004,8 @@
             <argLine>-Xmx3g -Xss4096k -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=512m</argLine>
             <environmentVariables>
               <!--
-                Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
-                launched by the tests have access to the correct test-time classpath.
+                  Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
+                  launched by the tests have access to the correct test-time classpath.
               -->
               <SPARK_DIST_CLASSPATH>${test_classpath}</SPARK_DIST_CLASSPATH>
               <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
@@ -2026,8 +2043,8 @@
             <stderr/>
             <environmentVariables>
               <!--
-                Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
-                launched by the tests have access to the correct test-time classpath.
+                  Setting SPARK_DIST_CLASSPATH is a simple way to make sure any child processes
+                  launched by the tests have access to the correct test-time classpath.
               -->
               <SPARK_DIST_CLASSPATH>${test_classpath}</SPARK_DIST_CLASSPATH>
               <SPARK_PREPEND_CLASSES>1</SPARK_PREPEND_CLASSES>
@@ -2225,8 +2242,8 @@
       </plugin>
 
       <!--
-        The shade plug-in is used here to create effective pom's (see SPARK-3812), and also
-        remove references from the shaded libraries from artifacts published by Spark.
+          The shade plug-in is used here to create effective pom's (see SPARK-3812), and also
+          remove references from the shaded libraries from artifacts published by Spark.
       -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
@@ -2262,8 +2279,8 @@
               <shadedPattern>org.spark-project.guava</shadedPattern>
               <excludes>
                 <!--
-                  These classes cannot be relocated, because the Java API exposes the
-                  "Optional" type; the others are referenced by the Optional class.
+                    These classes cannot be relocated, because the Java API exposes the
+                    "Optional" type; the others are referenced by the Optional class.
                 -->
                 <exclude>com/google/common/base/Absent*</exclude>
                 <exclude>com/google/common/base/Function</exclude>
@@ -2373,8 +2390,8 @@
   <profiles>
 
     <!--
-      This profile is enabled automatically by the sbt built. It changes the scope for the guava
-      dependency, since we don't shade it in the artifacts generated by the sbt build.
+	This profile is enabled automatically by the sbt built. It changes the scope for the guava
+	dependency, since we don't shade it in the artifacts generated by the sbt build.
     -->
     <profile>
       <id>sbt</id>
@@ -2451,7 +2468,7 @@
     <!-- A series of build profiles where customizations for particular Hadoop releases can be made -->
 
     <!-- Hadoop-a.b.c dependencies can be found at
-    http://hadoop.apache.org/docs/ra.b.c/hadoop-project-dist/hadoop-common/dependency-analysis.html
+	 http://hadoop.apache.org/docs/ra.b.c/hadoop-project-dist/hadoop-common/dependency-analysis.html
     -->
 
     <profile>
@@ -2469,7 +2486,7 @@
 
     <profile>
       <id>hadoop-2.2</id>
-    <!-- SPARK-7249: Default hadoop profile. Uses global properties. -->
+      <!-- SPARK-7249: Default hadoop profile. Uses global properties. -->
     </profile>
 
     <profile>
@@ -2557,9 +2574,9 @@
     </profile>
 
     <!--
-      These empty profiles are available in some sub-modules. Declare them here so that
-      maven does not complain when they're provided on the command line for a sub-module
-      that does not have them.
+	These empty profiles are available in some sub-modules. Declare them here so that
+	maven does not complain when they're provided on the command line for a sub-module
+	that does not have them.
     -->
     <profile>
       <id>flume-provided</id>
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 9bff1e5..159163f 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -65,6 +65,10 @@
       <artifactId>spark-test-tags_${scala.binary.version}</artifactId>
     </dependency>
     <dependency>
+        <groupId>com.databricks</groupId>
+        <artifactId>spark-csv_2.10</artifactId>
+    </dependency>	
+    <dependency>
       <groupId>org.apache.parquet</groupId>
       <artifactId>parquet-column</artifactId>
     </dependency>