Spark PruneDependency 依赖关系 RangePartitioner
Represents a dependency between the PartitionPruningRDD and its parent. In this
case, the child RDD contains a subset of partitions of the parents'.
<iframe width="800" height="500" src="//player.bilibili.com/player.html?aid=37442139&cid=65838008&page=4" scrolling="no" border="0" frameborder="no" framespacing="0" allowfullscreen="true"> </iframe>
List(( "a", 2 ), ("d", 1 ), ("b", 8 ), ("d", 3 )
package com .opensource .bigdata .spark .local .rdd .operation .dependency .narrow .n_03_pruneDependency .n_02_filterByRange
import com .opensource .bigdata .spark .local .rdd .operation .base .BaseScalaSparkContext
object Run extends BaseScalaSparkContext {
def main (args : Array [String ]): Unit = {
val sc = pre()
val rdd1 = sc.parallelize(List ((" a" ,2 ),(" d" ,1 ),(" b" ,8 ),(" d" ,3 )),2 ) // ParallelCollectionRDD
val rdd1Sort = rdd1.sortByKey() // ShuffleRDD
val rdd2 = rdd1Sort.filterByRange(" a" ," b" ) // MapParttionsRDD
println(" rdd \n " + rdd2.collect().mkString(" \n " ))
sc.stop()
}
}