oryx2默认配置文件
2016-04-20 16:50
176 查看
# Copyright (c) 2014, Cloudera, Inc. All Rights Reserved. # # Cloudera, Inc. licenses this file to you under the Apache License, # Version 2.0 (the "License"). You may not use this file except in # compliance with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR # CONDITIONS OF ANY KIND, either express or implied. See the License for # the specific language governing permissions and limitations under the # License. oryx = { # Optional string identifying the entire Oryx instance. Without it, Oryx has no # longer-term identity with external systems like Kafka. For example, without this set, # on startup the layers read from only the latest available input. With this set, # Kafka can load the last offset read and let the layers resume reading where they left off. id = null # Configuration for the Kafka input topic input-topic = { # Comma-separated list of Kafka brokers, as host1:port1(,host2:port2,...) broker = "localhost:9092" lock = { # Comma-separated list of Zookeeper masters, as host1:port1(,host2:port2,...) # Note that if you need to specify a chroot, then weirdly, it should only appear # at the end: host1:port1,host2:port2/chroot, not host1:port1/chroot,host2:port2/chroot master = "localhost:2181" } message = { # Input topic topic = "OryxInput" # Key/message classes that the framework receives, respectively key-class = "java.lang.String" message-class = "java.lang.String" # Decoder classes used to read/write key/message classes key-decoder-class = "kafka.serializer.StringDecoder" message-decoder-class = "kafka.serializer.StringDecoder" } } # Configuration for the Kafka model update topic update-topic = { # Comma-separated list of Kafka brokers, as host1:port1(,host2:port2,...) # Can be null to disable publishing to an update topic broker = "localhost:9092" lock = { # Comma-separated list of Zookeeper masters, as host1:port1(,host2:port2,...) # Note that if you need to specify a chroot, then weirdly, it should only appear # at the end: host1:port1,host2:port2/chroot, not host1:port1/chroot,host2:port2/chroot # Can be null to disable publishing to an update topic master = "localhost:2181" } message = { # Update topic # Can be null to disable publishing to an update topic topic = "OryxUpdate" # Decoder/encoder classes used to read/write key/message classes decoder-class = "kafka.serializer.StringDecoder" encoder-class = "kafka.serializer.StringEncoder" # Max size in bytes of message to write to the update topic. Don't change this unless you # know what you're doing. PMML models larger than this will be passed a location on HDFS, # for example. This should match the max.message.bytes configured for the update topic. # This value can't be larger than about 64MB in any event. max-size = 16777216 } } # Default Spark key-value pairs used in batch, streaming default-streaming-config = { spark.io.compression.codec = "lzf" spark.speculation = true spark.logConf = true spark.serializer = "org.apache.spark.serializer.KryoSerializer" spark.ui.showConsoleProgress = false } # Batch layer configuration batch = { # Streaming framework configuration streaming = { # Spark Streaming master. If local , make sure n >= 2 master = "yarn-client" # Interval between runs of the computation layer. Default: 6 hours generation-interval-sec = 21600 # Number of executors to start. In YARN-based deployments, this is a # maximum, and fewer executors may be used when the process is idle if # dynamic-allocation is enabled num-executors = 4 # Cores per executor executor-cores = 4 # Memory per executor executor-memory = "2g" # Heap size for the Batch driver process. driver-memory = "1g" # Enable dynamic allocation? YARN-only and not always desirable for streaming dynamic-allocation = false # Spark config key-value pairs config = ${oryx.default-streaming-config} } # An implementation of com.cloudera.oryx.api.batch.BatchLayerUpdate # which specifies what is done with current and historical data to update a model update-class = null storage = { # Directory where historical data is stored. Can be local, or on HDFS, etc. data-dir = "file:/tmp/Oryx/data/" # Directory where models are output. Can be local, or on HDFS, etc. model-dir = "file:/tmp/Oryx/model/" # Writable classes used to persist key/message, respectively key-writable-class = "org.apache.hadoop.io.Text" message-writable-class = "org.apache.hadoop.io.Text" # Data older than this many hours may be automatically deleted. -1 means no maximum. max-age-data-hours = -1 } # Configuration for the Spark UI ui = { # UI port port = 4040 } } # Speed layer configuration speed = { # Streaming framework configuration streaming = { # Spark Streaming master. If local , make sure n >= 2 master = "yarn-client" # See tuning rule of thumb above # Interval between runs of the computation layer in seconds. Default: 10 seconds generation-interval-sec = 10 # Number of executors to start. In YARN-based deployments, this is a # maximum, and fewer executors may be used when the process is idle if # dynamic-allocation is enabled num-executors = 2 # Cores per executor executor-cores = 4 # Memory per executor executor-memory = "1g" # Heap size for the Speed driver process. driver-memory = "512m" # Enable dynamic allocation? YARN-only and not always desirable for streaming dynamic-allocation = false # Spark config key-value pairs config = ${oryx.default-streaming-config} } # Implementation of com.cloudera.oryx.api.speed.SpeedModelManager interface that produces # updates from a SpeedModel and stream of input model-manager-class = null # See doc for serving.min-model-serve-fraction below min-model-load-fraction = 0.8 # Configuration for the Spark UI ui = { # UI port port = 4040 } } # Serving layer configuration serving = { # Memory to allocate for each serving layer instance. Must be in MB for now, ending with 'm' memory = "4000m" # Optional config when using YARN-based deployment yarn = { # Number of serving layers to run instances = 1 # Cores to allocate for each serving layer instance cores = "4" } api = { # Default to use well-known HTTP port for Serving Layer port = 80 # Default to use well-known HTTPS port for Serving Layer secure-port = 443 # User name for connecting to the service, if required. If set, must be set with password. # If enabled, this will enable HTTP DIGEST authentication in the API. user-name = null # Password for connecting to the service, if required. If set, must be set with user-name. # If enabled, this will enable HTTP DIGEST authentication in the API. password = null # The keystore file containing the server's SSL keys. Only necessary when # accessing a server with temporary self-signed key, which is not trusted # by the Java SSL implementation. keystore-file = null # Password needed for keystore file above, if any keystore-password = null # If true, operations that set or modify data, like /ingest, are not available read-only = false # An optional prefix for the path under which the service is deployed. For # example, set to "/contextPath" to expose services at paths like "http://example.org/contextPath/..." context-path = "/" } # Where to load application JAX-RS resources (one or more comma-separated Java package names) application-resources = null # Implementation of com.cloudera.oryx.api.serving.ServingModelManager interface # that produces a ServingModel from stream of updates model-manager-class = null # Don't consider a model loaded and ready to use until this fraction of it is loaded. # Some models load incrementally (e.g. ALS). Others load all at once, in which case this # has no effect. min-model-load-fraction = 0.8 # Test-only option; don't set this in general no-init-topics = false } # ML tier configuration ml = { # Model evaluation settings eval = { # Fraction of current data that is used for test, versus training test-fraction = 0.1 # Increase to build more candidate models per run, and pick the best one candidates = 1 # Number of models to build in parallel parallelism = 1 } } }
相关文章推荐
- 从源码安装Mysql/Percona 5.5
- Spark RDD API详解(一) Map和Reduce
- 使用spark和spark mllib进行股票预测
- Kafka 之 中级
- Spark随谈——开发指南(译)
- Spark,一种快速数据分析替代方案
- 浅析Ruby的源代码布局及其编程风格
- asp.net 抓取网页源码三种实现方法
- JS小游戏之仙剑翻牌源码详解
- JS小游戏之宇宙战机源码详解
- jQuery源码分析之jQuery中的循环技巧详解
- 本人自用的global.js库源码分享
- java中原码、反码与补码的问题分析
- ASP.NET使用HttpWebRequest读取远程网页源代码
- PHP网页游戏学习之Xnova(ogame)源码解读(六)
- C#获取网页HTML源码实例
- PHP网页游戏学习之Xnova(ogame)源码解读(八)
- PHP网页游戏学习之Xnova(ogame)源码解读(四)
- JS小游戏之极速快跑源码详解
- JS小游戏之象棋暗棋源码详解