Skip to content

Commit

Permalink
merge in changes
Browse files Browse the repository at this point in the history
  • Loading branch information
mhamilton723 committed Jun 21, 2021
1 parent 4601e72 commit 7cd5c40
Show file tree
Hide file tree
Showing 23 changed files with 55 additions and 55 deletions.
11 changes: 2 additions & 9 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,24 +1,17 @@
import java.io.{File, PrintWriter}
import java.io.File
import java.net.URL

import org.apache.commons.io.FileUtils
import sbt.ExclusionRule

import scala.xml.{Node => XmlNode, NodeSeq => XmlNodeSeq, _}
import scala.xml.transform.{RewriteRule, RuleTransformer}
import BuildUtils._
import CodegenPlugin.autoImport.pythonizedVersion
import sbt.Project.projectToRef
import xerial.sbt.Sonatype._

val condaEnvName = "mmlspark"
name := "mmlspark"
organization := "com.microsoft.ml.spark"
scalaVersion := "2.12.10"
val sparkVersion = "3.1.2"
name := "mmlspark"
ThisBuild / organization := "com.microsoft.ml.spark"
ThisBuild / scalaVersion := "2.12.10"
val sparkVersion = "3.0.1"

val scalaMajorVersion = 2.12

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ object RESTHelpers {
response
} else {
val requestBodyOpt = Try(request match {
case er: HttpEntityEnclosingRequestBase => IOUtils.toString(er.getEntity.getContent)
case er: HttpEntityEnclosingRequestBase => IOUtils.toString(er.getEntity.getContent, "UTF-8")
case _ => ""
}).get

val responseBodyOpt = Try(IOUtils.toString(response.getEntity.getContent)).getOrElse("")
val responseBodyOpt = Try(IOUtils.toString(response.getEntity.getContent, "UTF-8")).getOrElse("")

throw new RuntimeException(
s"Failed: " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,15 @@ object SpeechAPI {
using(Client.execute(request)) { response =>
if (!response.getStatusLine.getStatusCode.toString.startsWith("2")) {
val bodyOpt = request match {
case er: HttpEntityEnclosingRequestBase => IOUtils.toString(er.getEntity.getContent)
case er: HttpEntityEnclosingRequestBase => IOUtils.toString(er.getEntity.getContent, "UTF-8")
case _ => ""
}
throw new RuntimeException(
s"Failed: response: $response " +
s"requestUrl: ${request.getURI}" +
s"requestBody: $bodyOpt")
}
IOUtils.toString(response.getEntity.getContent)
IOUtils.toString(response.getEntity.getContent, "UTF-8")
.parseJson.asJsObject().fields("Signature").compactPrint
}.get
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,15 @@ object FaceUtils extends CognitiveKey {
using(Client.execute(request)) { response =>
if (!response.getStatusLine.getStatusCode.toString.startsWith("2")) {
val bodyOpt = request match {
case er: HttpEntityEnclosingRequestBase => IOUtils.toString(er.getEntity.getContent)
case er: HttpEntityEnclosingRequestBase => IOUtils.toString(er.getEntity.getContent, "UTF-8")
case _ => ""
}
throw new RuntimeException(
s"Failed: response: $response " +
s"requestUrl: ${request.getURI}" +
s"requestBody: $bodyOpt")
}
IOUtils.toString(response.getEntity.getContent)
IOUtils.toString(response.getEntity.getContent, "UTF-8")
}.get
})
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing}
import org.apache.http.client.methods.HttpDelete
import org.apache.spark.ml.util.MLReadable
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.functions.{lit, udf, col, split}
import org.apache.spark.sql.functions.{col, lit, split, udf}

import scala.collection.mutable
import scala.concurrent.blocking

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,24 @@ object SparkSessionFactory {
if (File.separator != "\\") path
else path.replaceFirst("[A-Z]:", "").replace("\\", "/")
}

def currentDir(): String = System.getProperty("user.dir")

def getSession(name: String, logLevel: String = "WARN",
numRetries: Int = 1, numCores: Option[Int] = None): SparkSession = {
val cores = numCores.map(_.toString).getOrElse("*")
val conf = new SparkConf()
.setAppName(name)
.setMaster(if (numRetries == 1){s"local[$cores]"}else{s"local[$cores, $numRetries]"})
.set("spark.logConf", "true")
.set("spark.sql.shuffle.partitions", "20")
.set("spark.driver.maxResultSize", "6g")
.set("spark.sql.warehouse.dir", SparkSessionFactory.LocalWarehousePath)
.set("spark.sql.crossJoin.enabled", "true")
.setAppName(name)
.setMaster(if (numRetries == 1) {
s"local[$cores]"
} else {
s"local[$cores, $numRetries]"
})
.set("spark.logConf", "true")
.set("spark.sql.shuffle.partitions", "20")
.set("spark.driver.maxResultSize", "6g")
.set("spark.sql.warehouse.dir", SparkSessionFactory.LocalWarehousePath)
.set("spark.sql.crossJoin.enabled", "true")
val sess = SparkSession.builder()
.config(conf)
.getOrCreate()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

package com.microsoft.ml.spark.core.test.base

import java.nio.file.Files

import breeze.linalg.norm.Impl
import breeze.linalg.{norm, DenseVector => BDV}
import breeze.math.Field
Expand All @@ -17,7 +19,6 @@ import org.scalatest._
import org.scalatest.concurrent.TimeLimits
import org.scalatest.time.{Seconds, Span}

import java.nio.file.Files
import scala.concurrent._
import scala.reflect.ClassTag

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ import java.nio.file.Files

import com.microsoft.ml.spark.codegen.CodegenConfig
import com.microsoft.ml.spark.core.env.FileUtilities
import com.microsoft.ml.spark.core.test.base.TestBase
import org.apache.commons.io.FileUtils
import org.apache.spark.ml._
import org.apache.spark.ml.param.{DataFrameEquality, ExternalPythonWrappableParam, ParamPair}
import org.apache.spark.ml.util.{MLReadable, MLWritable}
import org.apache.spark.sql.DataFrame
import com.microsoft.ml.spark.codegen.GenerationUtils._
import com.microsoft.ml.spark.core.test.base.TestBase

/**
* Class for holding test information, call by name to avoid uneccesary computations in test generations
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
package com.microsoft.ml.spark.core.test.fuzzing

import com.microsoft.ml.spark.core.contracts.{HasFeaturesCol, HasInputCol, HasLabelCol, HasOutputCol}
import com.microsoft.ml.spark.core.test.base.TestBase
import com.microsoft.ml.spark.core.utils.JarLoadingUtils
import org.apache.spark.ml._
import org.apache.spark.ml.param._
import org.apache.spark.ml.util.{MLReadable, MLWritable}

import java.lang.reflect.ParameterizedType

import com.microsoft.ml.spark.core.test.base.TestBase

import scala.language.existentials

/** Tests to validate fuzzing of modules. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ package com.microsoft.ml.spark.explainers.split1
import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV}
import breeze.stats.distributions.RandBasis
import breeze.stats.{mean, stddev}
import com.microsoft.ml.spark.core.test.base.TestBase
import com.microsoft.ml.spark.explainers.BreezeUtils._
import com.microsoft.ml.spark.explainers._
import com.microsoft.ml.spark.io.image.ImageUtils
Expand All @@ -17,8 +16,9 @@ import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
import org.apache.spark.sql.types._
import org.scalactic.{Equality, TolerantNumerics}
import org.scalatest.Matchers._

import java.nio.file.{Files, Paths}

import com.microsoft.ml.spark.core.test.base.TestBase
import javax.imageio.ImageIO

class SamplerSuite extends TestBase {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject}
import org.apache.commons.io.FileUtils
import org.apache.spark.ml.PipelineModel
import org.apache.spark.ml.feature.StringIndexer
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vectors, Vector}
import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
import org.apache.spark.ml.util.MLReadable
import org.apache.spark.sql._

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@

package com.microsoft.ml.spark.flaky

import com.microsoft.ml.spark.core.test.base.{SparkSessionFactory, TestBase, TimeLimitedFlaky}
import com.microsoft.ml.spark.core.test.base.{TestBase, TimeLimitedFlaky}
import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing}
import com.microsoft.ml.spark.stages.PartitionConsolidator
import org.apache.spark.ml.util.MLReadable
import org.apache.spark.sql.catalyst.encoders.RowEncoder
import org.apache.spark.sql.types.{DoubleType, StructType}
import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
import org.apache.spark.sql.{DataFrame, Dataset, Row}
import org.scalatest.Assertion

class PartitionConsolidatorSuite extends TransformerFuzzing[PartitionConsolidator] with TimeLimitedFlaky {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ import java.net.URL

import com.microsoft.ml.spark.build.BuildInfo
import com.microsoft.ml.spark.core.env.FileUtilities
import com.microsoft.ml.spark.core.test.base.TestBase
import org.apache.spark.ml.linalg.DenseVector
import org.apache.spark.sql.{DataFrame, SparkSession}
import com.microsoft.ml.spark.core.test.base.TestBase
import com.microsoft.ml.spark.io.IOImplicits.dfrToDfre
import org.apache.commons.io.FileUtils
import org.apache.spark.sql.functions.col
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ package com.microsoft.ml.spark.io.split2

import java.io.File
import java.util.UUID
import java.util.concurrent.TimeUnit

import com.microsoft.ml.spark.core.test.base.{Flaky, TestBase}
import com.microsoft.ml.spark.io.IOImplicits._
Expand All @@ -15,7 +14,6 @@ import org.apache.spark.sql.streaming.{DataStreamReader, StreamingQuery, Trigger
import org.apache.spark.sql.types.BinaryType

import scala.concurrent.Await
import scala.concurrent.duration.Duration

// scalastyle:off magic.number
class ContinuousHTTPSuite extends TestBase with Flaky with HTTPTestUtils {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ class DistributedHTTPSuite extends TestBase with Flaky with HTTPTestUtils {

processes.foreach { p =>
p.waitFor
val error = IOUtils.toString(p.getErrorStream)
val error = IOUtils.toString(p.getErrorStream, "UTF-8")
assert(error === "")
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ object DatabricksUtilities extends HasHttpClient {
if (response.getStatusLine.getStatusCode != 200) {
throw new RuntimeException(s"Failed: response: $response")
}
IOUtils.toString(response.getEntity.getContent).parseJson
IOUtils.toString(response.getEntity.getContent, "UTF-8").parseJson
}.get
})
}
Expand All @@ -102,7 +102,7 @@ object DatabricksUtilities extends HasHttpClient {
val entity = IOUtils.toString(response.getEntity.getContent, "UTF-8")
throw new RuntimeException(s"Failed:\n entity:$entity \n response: $response")
}
IOUtils.toString(response.getEntity.getContent).parseJson
IOUtils.toString(response.getEntity.getContent, "UTF-8").parseJson
}.get
})
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ package com.microsoft.ml.spark.train
import java.io.File

import com.microsoft.ml.spark.core.schema.SchemaConstants
import com.microsoft.ml.spark.core.test.base.TestBase
import com.microsoft.ml.spark.core.test.benchmarks.Benchmarks
import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject}
import com.microsoft.ml.spark.featurize.ValueIndexer
Expand All @@ -18,6 +17,7 @@ import org.apache.spark.mllib.evaluation.{BinaryClassificationMetrics, Multiclas
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{DataFrame, Row}
import com.microsoft.ml.spark.codegen.GenerationUtils
import com.microsoft.ml.spark.core.test.base.TestBase

object ClassifierTestUtils {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ private[spark] class HDFSRepo[S <: Schema](val uri: URI, val hconf: HadoopConf)
.filter(status =>
status.isFile & status.getPath.toString.endsWith(".meta"))
.map(status =>
IOUtils.toString(fs.open(status.getPath).getWrappedStream))
IOUtils.toString(fs.open(status.getPath).getWrappedStream, "UTF-8"))

schemaStrings.map(s => s.parseJson.convertTo[S]).toList
}
Expand Down Expand Up @@ -94,7 +94,7 @@ private[spark] class HDFSRepo[S <: Schema](val uri: URI, val hconf: HadoopConf)
val newSchema = schema.updateURI(location)
val schemaPath = new Path(location.getPath + ".meta")
val osSchema = fs.create(schemaPath)
val schemaIs = IOUtils.toInputStream(newSchema.toJson.prettyPrint)
val schemaIs = IOUtils.toInputStream(newSchema.toJson.prettyPrint, "UTF-8")
try {
HUtils.copyBytes(schemaIs, osSchema, hconf)
} finally {
Expand Down Expand Up @@ -130,9 +130,9 @@ private[spark] class DefaultModelRepo(val baseURL: URL) extends Repository[Model
val url = join(baseURL, "MANIFEST")
val manifestStream = toStream(url)
try {
val modelStreams = IOUtils.readLines(manifestStream).asScala.map(fn => toStream(join(baseURL, fn)))
val modelStreams = IOUtils.readLines(manifestStream, "UTF-8").asScala.map(fn => toStream(join(baseURL, fn)))
try {
modelStreams.map(s => IOUtils.toString(s).parseJson.convertTo[ModelSchema])
modelStreams.map(s => IOUtils.toString(s, "UTF-8").parseJson.convertTo[ModelSchema])
} finally {
modelStreams.foreach(_.close())
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@

package com.microsoft.ml.spark.explainers

import java.io.File
import java.net.URL

import com.microsoft.ml.spark.core.test.base.TestBase
import com.microsoft.ml.spark.image.{ImageFeaturizer, NetworkUtils}
import com.microsoft.ml.spark.image.{ImageFeaturizer, TrainedCNTKModelUtils}
import com.microsoft.ml.spark.io.IOImplicits._
import org.apache.commons.io.FileUtils
import org.apache.spark.sql.DataFrame

import java.io.File
import java.net.URL

abstract class ImageExplainersSuite extends TestBase with NetworkUtils {
abstract class ImageExplainersSuite extends TestBase with TrainedCNTKModelUtils {
lazy val greyhoundImageLocation: String = {
val loc = "/tmp/greyhound.jpg"
val f = new File(loc)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.explainers.split3
package com.microsoft.ml.spark.explainers.split2

import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing}
import com.microsoft.ml.spark.explainers.BreezeUtils._
import com.microsoft.ml.spark.explainers.{ImageExplainersSuite, ImageFormat, ImageLIME, LocalExplainer}
import com.microsoft.ml.spark.lime.SuperpixelData
import com.microsoft.ml.spark.io.IOImplicits._
import com.microsoft.ml.spark.lime.SuperpixelData
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.ml.util.MLReadable
import org.apache.spark.sql.functions.col
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.

package com.microsoft.ml.spark.explainers.split2
package com.microsoft.ml.spark.explainers.split3

import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing}
import com.microsoft.ml.spark.explainers.{ImageExplainersSuite, ImageFormat, ImageSHAP, LocalExplainer}
import com.microsoft.ml.spark.explainers.BreezeUtils._
import com.microsoft.ml.spark.explainers.{ImageExplainersSuite, ImageFormat, ImageSHAP, LocalExplainer}
import com.microsoft.ml.spark.lime.SuperpixelData
import org.apache.spark.ml.linalg.Vector
import org.apache.spark.ml.util.MLReadable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,9 @@ object DatasetUtils {

/**
* Sample the first several rows to determine whether to construct sparse or dense matrix in lightgbm native code.
* @param rowsIter Iterator of rows.
* @param schema The schema.
*
* @param rowsIter Iterator of rows.
* @param schema The schema.
* @param columnParams The column parameters.
* @return A reconstructed iterator with the same original rows and whether the matrix should be sparse or dense.
*/
Expand All @@ -158,7 +159,7 @@ object DatasetUtils {
}

def addFeaturesToChunkedArray(featuresChunkedArrayOpt: Option[doubleChunkedArray], numCols: Int,
rowAsDoubleArray: Array[Double]): Unit = {
rowAsDoubleArray: Array[Double]): Unit = {
featuresChunkedArrayOpt.foreach { featuresChunkedArray =>
rowAsDoubleArray.foreach { doubleVal =>
featuresChunkedArray.add(doubleVal)
Expand Down

0 comments on commit 7cd5c40

Please sign in to comment.