Scala 学习笔记

Posted by Daniel on December 6, 2017

本笔记部分是根据Scala Collections 提示和技巧的博客整理的 Scala 知识点。

1、高阶函数

case class Email(subject: String, text: String, sender: String, recipient: String)

def complement[A](predicate: A => Boolean) = (a: A) => !predicate(a)

def any[A](predicates: (A => Boolean)*): A => Boolean =
          a => predicates.exists(pred => pred(a))

def none[A](predicates: (A => Boolean)*) = complement(any(predicates: _*))

def every[A](predicates: (A => Boolean)*) = none(predicates.view.map(complement(_)): _*)

type EmailFilter = Email => Boolean

def newMailsForUser(mails: Seq[Email], f: EmailFilter): Seq[Email] = mails.filter(f)

// 产生EmailFilter的工厂方法
val sentByOneOf: Set[String] => EmailFilter =
          senders =>
            email => senders.contains(email.sender)

val notSentByAnyOf: Set[String] => EmailFilter =
        sentByOneOf.andThen(complement(_))

type SizeChecker = Int => Boolean

val minimumSize1: Int => EmailFilter =
          n =>
            email => email.text.size >= n

val sizeConstraint: SizeChecker => EmailFilter =
          f =>
            email => f(email.text.size)

val minimumSize: Int => EmailFilter =
          n =>
            sizeConstraint(_ >= n)

val maximumSize: Int => EmailFilter =
          n =>
            sizeConstraint(_ <= n)
val mails = Email(
        subject = "It's me again, your stalker friend!",
        text = "Hello my friend! How are you?",
        sender = "johndoe@example.com",
        recipient = "me@example.com") :: Nil

val filter: EmailFilter = every(
            notSentByAnyOf(Set("johndoe@example.com")),
            minimumSize(100),
            maximumSize(10000)
          )

newMailsForUser(mails, filter) // returns an empty list

2、模式匹配表达式

case class Player(name: String, score: Int)
def message(player: Player) = player match {
  case Player(_, score) if score > 100000 => "Get a job, dude!"
  case Player(name, _) => "Hey, $name, nice to see you again!"
}
def printMessage(player: Player) = println(message(player))

3、for 语句中的模式

def gameResults(): Seq[(String, Int)] =
  ("Daniel", 3500) :: ("Melissa", 13000) :: ("John", 7000) :: Nil
def hallOfFame = for {
    (name, score) <- gameResults()
    if (score > 5000)
  } yield name

4、偏函数

def wordsWithoutOutliers(wordFrequencies: Seq[(String, Int)]): Seq[String] =
  wordFrequencies.filter { case (_, freq) if freq > 3 && freq < 25 } 
    .map {case (w, _) => w}

// 上面的例子中,先过滤序列后对剩下的元素映射,需要遍历2次序列。
// collect可以将给定的偏函数应用到序列每一个元素,首先检查偏函数在其上面是否有定义,如果没有定义直接忽略,否则将偏函数应用到这个元素上。

def collect[B](pf: PartialFunction[A, B]): Seq[B]

val pf = new PartialFunction[(String, Int), String] {
  def apply(wordFrequency: (String, Int)) = wordFrequency match {
    case (word, freq) if freq > 3 && freq < 25 => word
  }
  def isDefinedAt(wordFrequency: (String, Int)) = wordFrequency match {
    case (word, freq) if freq > 3 && freq < 25 => true
    case _ => false
  }
}

def wordsWithoutOutliers(wordFrequencies: Seq[(String, Int)]): Seq[String] =
  wordFrequencies.collect { case (word, freq) if freq > 3 && freq < 25 => word }

// 偏函数还可以实现函数式的责任链模式  

5、判断Seq中检查一个元素是否存在

seq.filter(_ == x).headOption != None
seq.find(_ == x) != None
seq.find(_ == x).isDefined
seq.exists(_ == x)
seq.contains(x)

6、Sequence 使用

// 创建集合

Seq.empty[T]
// 对于数组,优先使用length而不是size

array.length
// 不要对检查empty的属性取反

seq.nonEmpty
seq.isEmpty

seq.lengthCompare(n) > 0
seq.lengthCompare(n) < 0
// 比较数组

array1.sameElements(array2)
// seq 和 set 比较

seq.toSet == set
// 有序集合比较

seq1 == seq2
// 第一个和最后一个元素

seq.head
seq.headOption
seq.last
seq.lastOption
// 下标的range

seq.indices
// 元素是否存在

seq.exists(p)
!seq.exists(p)
// 不要对断言取反

seq.filterNot(p)
// 查找元素

seq.find(p)
// 反序

seq.sorted(Ordering[T].reverse)
seq.sortBy(_.property)(Ordering[T].reverse)
seq.sortWith(!f(_, _))
// 合并多个函数

seq.filter(x => p1(x) && p2(x))
seq.map(f.andThen(g)) // seq.map(f).map(g)


7、Option 使用

// 不要使用None和Option比较

option.isEmpty
option.isDefined

case class User(id: Int, age: Int, gender: Option[STring])

// Option 模式匹配

val user = User(2, 30, None)
user.gender.getOrElse("not specified")

val gender = user.gender match {
  case Some(gender) => gender
  case None => "not specified"
}

// 不要检查值的存在性再处理值,Option值存在的时候用foreach执行某个副作用
// Some foreach 被调用一次, None不会被调用

user.gender.foreach { v =>
    println(v)
}
// 执行映射

val gender = UserRepository.findById(1).map(_.age) // age is Some(33)
val gender = UserRepository.findById(1).map(_.gender) // gender is Option[Option[String]]
val gender = UserRepository.findById(1).flatMap(_.gender) // gender is Option[String]

option.fold(z)(f) // option.map(f).getOrElse(z)

8、Map 使用

// filterKeys mapValues

map.filter(p(_._1))  map.map(f(_._2))
// 移除seq中的key

map -- seq
// 赋值

map += x -> y
map1 ++= map2
map -= x
map --= seq

9、设置double的位数

val formattedVal = "%.6f" format val
formattedVal.toDouble

10、类型转化

def toInt(s: String): Option[Int] = {
  try {
    Some(s.toInt)
  } catch {
    case e: Exception => None
  }
}

import scala.util.{Try, Success, Failure}
def makeInt(s: String): Try[Int] = Try(s.trim.toInt)

11、不可变数组和可变数组

val fruits = Array("Apple", "Banana", "Orange")
val fruits = new Array[String](3)
fruits(0) = "Apple"
fruits(1) = "Banana"
fruits(2) = "Orange"

import scala.collection.mutable.ArrayBuffer
import scala.collection.mutable.ArrayBuilder

val fruits = ArrayBuffer[String]()
fruits += "Apple"
fruits += "Banana"
fruits.toArray()

val fruits = ArrayBuilder.make[String]
fruits += "Apple"
fruits += "Banana"
fruits.result()

val fruits = new ListBuffer[String]
fruits.result()

12、foreach循环

var sum = 0
val x = List(1,2,3)
x.foreach(sum += _)

val names = Vector("Bob", "Fred", "Joe", "Julia", "Kim")
for (name <- names if name.startsWith("J"))
    println(name)

val m1 = Map("fname" -> "Al", "lname" -> "Alexander")
for ((k,v) <- m1) printf("key: %s, value: %s\n", k, v)
m1 foreach (x => println (x._1 + "-->" + x._2))
m1 foreach {case (key, value) => println (key + "-->" + value)}    

13、数组分片

// def sliding(size: Int, step: Int): Iterator[Iterable][A]]

val nums = (1 to 5).toArray
nums.sliding(2).toList
nums.sliding(2,2).toList
nums.sliding(2,3).toList

// 当size和step相等的时候,可以简化使用grouped方法
nums.grouped(2).toList

val x = List(15, 10, 5, 8, 20, 12)
val (a, b) = x.partition(_ > 10)
val groups = x.groupBy(_ > 10)

14、Future

import scala.concurrent.{Await, Future}
import scala.util.{Failure, Success}
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.Duration

def concurrentCount(df1: DataFrame, df2: DataFrame): Future[(Long, Long)] = {
    val count1Future: Future[Long] = Future {
        df1.count()
    }
    val count2Future: Future[Long] = Future {
        df2.count()
    }
    val count: Future[(Long, Long)] = for {
        c1 <- count1Future
        c2 <- count2Future
    } yield (c1, c2)
    count
}

val count: Future[(Long, Long)] = concurrentCount(positiveDF, negativeDF)
val (posCount, negCount) = Await.result(count, Duration.Inf)

val count = concurrentCount(trainDF, validateDF)
count.onComplete {
    case Success((trainCount, validateCount)) => println(s"train sample count: $trainCount validate sample count: $validateCount")
    case Failure(ex) => println(s"train sample count error")
}

15、异步事件处理

import java.util.Timer
import java.util.TimerTask
import scala.concurrent.{Await, Future}
import scala.util.{Failure, Success}
import scala.concurrent.ExecutionContext.Implicits.global
import scala.concurrent.duration.Duration
 
val timer = new Timer

def delayedSuccess[T](secs: Int, value: T): Future[T] = {
    val result = Promise[T]
    timer.schedule(new TimerTask() {
        def run() = {
            result.success(value)
        }
    }, secs * 1000)
    result.future
}

def delayedFailure(secs: Int, msg: String): Future[Int] = {
    val result = Promise[Int]
    timer.schedule(new TimerTask() {
        def run() = {
            result.failure(new IllegalArgumentException(msg))
        }
    }, secs * 1000)
    result.future
}

def task1(input: Int) = TimedEvent.delayedSuccess(1, input + 1)
def task2(input: Int) = TimedEvent.delayedSuccess(2, input + 2)
def task3(input: Int) = TimedEvent.delayedSuccess(3, input + 3)
def task4(input: Int) = TimedEvent.delayedSuccess(1, input + 4)

// 阻塞等待任务执行

def runBlocking() = {
    val v1 = Await.result(task1(1), Duration.Inf)
    val future2 = task2(v1)
    val future3 = task3(v1)
    val v2 = Await.result(future2, Duration.Inf)
    val v3 = Await.result(future3, Duration.Inf)
    val v4 = Await.result(task4(v2 + v3), Duration.Inf)
    val result = Promise[Int]
    result.success(v4)
    result.future
}

// 使用 onSuccess() 处理事件的完成

def runOnSuccess() = {
    val result = Promise[Int]
    task1(1).onSuccess(v => v match {
        case v1 => {
            val a = task2(v1)
            val b = task3(v1)
            a.onSuccess(v => v match {
                case v2 =>
                    b.onSuccess(v => v match {
                        case v3 => task4(v2 + v3).onSuccess(v4 => v4 match {
                            case x => result.success(x)
                })
                })
            })
        }
    })
    result.future
}

// 使用 flatMap() 处理事件的完成

def runFlatMap() = {
    task1(1) flatMap {v1 =>
        val a = task2(v1)
        val b = task3(v1)
        a flatMap { v2 =>
            b flatMap { 
                v3 => task4(v2 + v3) 
            }
        }
    }
}