286
правок
Изменения
м
<pre> install.packages(<font color="green">"packageName"</font>) require(<font color="green">"packageName")</prefont>)
<pre> library(<font color="green">"packageName")</prefont>)
<pre>
#$$reading data
data <- read.csv("input.csv", sep = ',', header = FALSE)
#evaluating linear regression model
model <- lm(data$x ~ data$y)
#getting summary
print(summary(model))
<pre>
#$$reading data
rdata <- read.csv("input.csv", sep = ',', header = FALSE)
#evaluating regression model
model <- lm(target ~ x + y + z, data = rdata)
<pre>
#importing library and its' dependencies
library(h2o)
h2o.init()
path <- system.file("extdata", "data.csv", package = "h2o")
data <- h2o.uploadFile(path = data)
<pre>
#$$importing package and it's dependencies
library(e1071)
#reading data
data <- read.csv("input.csv", sep = ',', header = FALSE)
<pre>
#$$ importing package and its' dependencies
library(caret)
#reading data
data <- read.csv("input.csv", sep = ',', header = FALSE)
#splitting data into train and test sets
index <- createDataPartition(y = data$target, p=0.8, list = FALSE)
training <- data[index,]
testing <- data[-index,]
#evaluating model
fit <- train(target ~ x + y + z,
data = train_flats,
method = "svmRadial",
trControl = trainControl(method = "repeatedcv", number = 10, repeats = 3))
<pre>
#loading libraries
install.packages("mlr")
library(mlr)
#loading data
train <- read.csv("input.csv")
test <- read.csv("testInput.csv")
#loading GBM
getParamSet("classif.gbm")
baseLearner <- makeLearner("classif.gbm", predict.type = "response")
Подсветка синтаксиса
Язык постоянно расширяется за счёт новых библиотек (пакетов). Для импорта одного пакета необходимо прописать в файле следующие строки:
Для того чтобы импортировать пакет с его зависимостями в код следует включить следующие строки:
== Описание известных пакетов ==
==== Линейная регрессия ====
{{Main|Линейная регрессия|ll=Линейная регрессия}}
<font color="gray"># reading data</font> data <- read.csv(<font color="green">"input.csv"</font>, <font color="#660099">sep</font> = <font color="green">','</font>, <font color="#660099">header</font> = FALSE) <font color="gray"># evaluating linear regression model</font> model <- lm(data$<strong><font color="#660E7A">x</font></strong> ~ data$<strong><font color="#660E7A">y</font></strong>) <font color="gray"># getting summary</font> print(summary(model)) <font color="gray">#visualizing data </font> plot(data$<strong><font color="#660E7A">y</font></strong>, data$<strong><font color="#660E7A">x</font></strong>) lines(data$<strong><font color="#660E7A">y</font></strong>, predict(fit), <font color="#660099">col </font> = <font color= "green">'red')</prefont>)
==== Множественная регрессия ====
<font color="gray"># reading data</font> rdata <- read.csv(<font color="green">"input.csv"</font>, <font color="#660099">sep</font> = <font color="green">','</font>, <font color="#660099">header</font> = FALSE) <font color="gray"># evaluating regression model</font> model <- lm(target ~ x + y + z, <font color="#660099">data</font> = rdata) <font color="gray">#getting summary </font> print(summary(model))</pre>
==== Логистическая регрессия ====
Логистическая регрессия – это модель регрессии, в которой переменная ответа принимает значения 0 или 1 (True или False). Реализация на языке <code>R</code> представлена в следующем фрагменте:
<prefont color="gray">#$$reading data</font> rdata <- read.csv(<font color="green">"input.csv"</font>, <font color="#660099">sep </font> = <font color= "green">','</font>, <font color="#660099">header </font> = FALSE) <font color="gray">#evaluating model</font> model = glm(<font color="#660099">formula </font> = target ~ x + y + z, <font color="#660099">data </font> = rdata, <font color="#660099">family </font> = binomial) <font color="gray">#printing summary</font> print(summary(model))</pre>
=== PCA ===
{{Main|Метод главных компонент (PCA)|ll=PCA}}
<font color="gray"># importing library and its' dependencies</font> library(h2o) h2o.init() path <- system.file(<font color="green">"extdata"</font>, <font color="green">"data.csv"</font>, <font color="#660099">package</font> = <font color="green">"h2o"</font>) data <- h2o.uploadFile(<font color="#660099">path</font> = data) <font color="gray">#evaluating</font> h2o.prcomp(<font color="#660099">training_frame </font> = data, <font color="#660099">k </font> = <font color="blue">8</font>, <font color="#660099">transform </font> = <font color="green">"STANDARDIZE")</prefont>)
=== Деревья решений, случайный лес ===
Для создания ''[[Дерево решений и случайный лес |деревьев решений]]'' в <code>R</code> используется функция <code>ctree()</code> из пакета <code>party</code>.
<prefont color="gray">#importing package </font> install.packages(<font color="green">"party"</font>) <font color="gray">#reading data</font> rdata <- read.csv(<font color="green">"input.csv"</font>, <font color="#660099">sep </font> = <font color="green">','</font>, <font color="#660099">header </font> = FALSE) <font color="gray">#evaluating model</font> output.tree <- ctree(target ~ x + y + z, <font color="#660099">data </font> = rdata) <font color="gray">#plotting results</font> plot(output.tree)</pre>
==== Случайный лес ====
Для создания ''[[Дерево решений и случайный лес|случайного леса]]'' необходимо импортировать пакет <code>randomForest</code>
<prefont color="gray">#importing packages </font> install.packages(<font color="green">"party"</font>) install.packages(<font color="green">"randomForest"</font>) <font color="gray">#reading data</font> rdata <- read.csv(<font color="green">"input.csv"</font>, <font color="#660099">sep </font> = <font color="green">','</font>, <font color="#660099">header </font> = FALSE) <font color="gray">#creating the forest</font> output.forest <- randomForest(target ~ x + y + z, <font color="#660099">data </font> = rdata) <font color="gray">#getting results</font> print(output.forest) </pre>
=== Наивный Бейесовский классификатор ===
{{Main|Байесовская классификация|ll=Байесовская классификация}}
<font color="gray"># importing package and it's dependencies</font> library(e1071) <font color="gray"># reading data</font> data <- read.csv(<font color="green">"input.csv"</font>, <font color="#660099">sep</font> = <font color="green">','</font>, <font color="#660099">header</font> = FALSE) <font color="gray">#splitting data into training and test data sets</font> index <- createDataPartition(<font color="#660099">y </font> = data$<strong><font color="#660E7A">target</font></strong>, <font color="#660099">p </font> = <font color= "blue">0.8</font>,<font color="#660099">list </font> = FALSE) training <- data[index,] testing <- data[-index,] <font color="gray">#create objects x and y for predictor and response variables</font> x = <- training[,-<font color="blue">9</font>] y = <- training$<strong><font color="#660E7A">target</font></strong> <font color="gray">#training model</font> model = <- train(x,y,<font color="green">'nb'</font>,<font color="#660099">trControl</font> =trainControl(<font color="#660099">method</font> = <font color="green">'cv'</font>,<font color="#660099">number</font> = <font color="blue">10</font>)) <font color="gray">#predicting results</font> predictions <- predict(model, <font color="#660099">newdata </font> = testing)</pre>
=== SVM ===
{{Main|Метод опорных векторов (SVM)|ll=SVM}}
<font color="gray"># importing package and its' dependencies</font> library(caret) <font color="gray">#reading data</font> data <- read.csv(<font color="green">"input.csv"</font>, <font color="#660099">sep</font> = <font color="green">','</font>, <font color="#660099">header</font> = FALSE) <font color="gray">#printing parameterssplitting data into train and test sets</font>print index <- createDataPartition(<font color="#660099">y</font> = data$<strong><font color="#660E7A">target</font></strong>, <font color="#660099">p</font> = <font color="blue">0.8</font>, <font color="#660099">list</font> = FALSE) training <- data[index,] testing <- data[-index,] <font color="gray"># evaluating model</font> fit<- train(target ~ x + y + z, <font color="#660099">data</font> = train_flats, <font color="#660099">method</font> = <font color="green">"svmRadial"</font>, <font color="#660099">trControl</font> = trainControl(<font color="#660099">method</font> = <font color="green">"repeatedcv"</font>, <font color="#660099">number</font> = <font color="blue">10</font>, <font color="#660099">repeats</font> = <font color="blue">3</font>)) <font color="gray"># printing parameters</prefont> print(fit)
=== GBM ===
{{Main|Бустинг, AdaBoost|ll=Бустинг}}
<font color="gray"># loading libraries</font> install.packages(<font color="green">"mlr"</font>) library(mlr) <font color="gray"># loading data</font> train <- read.csv(<font color="green">"input.csv"</font>) test <- read.csv(<font color="green">"testInput.csv"</font>) <font color="gray"># loading GBM</font> getParamSet(<font color="green">"classif.gbm"</font>) baseLearner <- makeLearner(<font color="green">"classif.gbm"</font>, <font color="#660099">predict.type</font> = <font color="green">"response"</font>) <font color="gray">#specifying parameters</font> controlFunction <- makeTuneControlRandom(<font color="#660099">maxit </font> = <font color= "blue">50000</font>)<font color="gray">#specifying tuning method</font> cvFunction <- makeResampleDesc(<font color="green">"CV"</font>,<font color="#660099">iters </font> = <font color= "blue">100000</font>) <font color="gray">#definig cross-validation function</font> gbmParameters<- makeParamSet( makeDiscreteParam(<font color="green">"distribution"</font>, <font color="#660099">values </font> = <font color= "green">"bernoulli"</font>), makeIntegerParam(<font color="green">"n.trees"</font>, <font color="#660099">lower </font> = <font color= "blue">100</font>, <font color="#660099">upper </font> = <font color="blue">1000</font>), <font color="gray">#number of trees</font> makeIntegerParam(<font color="green">"interaction.depth"</font>, <font color="#660099">lower </font> = <font color= "blue">2</font>, <font color="#660099">upper </font> = <font color= "blue">10</font>), <font color="gray">#depth of tree</font> makeIntegerParam(<font color="green">"n.minobsinnode"</font>, <font color="#660099">lower </font> = <font color= "blue">10</font>, <font color="#660099">upper </font> = <font color= "blue">80</font>), makeNumericParam(<font color="green">"shrinkage"</font>,<font color="#660099">lower </font> = <font color= "blue">0.01</font>, <font color="#660099">upper </font> = <font color="blue">1</font>) ) <font color="gray">#tunning parameters</font> gbmTuningParameters <- tuneParams(<font color="#660099">learner </font> = baseLearner, <font color="#660099">task </font> = trainTask, <font color="#660099">resampling </font> = cvFunction, <font color="#660099">measures </font> = acc, <font color="#660099">par.set </font> = gbmParameters, <font color="#660099">control </font> = controlFunction) <font color="gray">#creating model parameters</font> model <- setHyperPars(<font color="#660099">learner </font> = baseLearner, <font color="#660099">par.vals </font> = gbmTuningParameters) <font color="gray">#evaluating model</font> fit <- train(model, train) predictions <- predict(fit, test)</pre>
=== Кластеризация ===
Для реализации алгоритма кластеризации ''k-средних'' используется пакет <code>ClusterR</code>. В нем реализовано 2 функции: <code>KMeans_arma()</code> и <code>KMeans_rcpp()</code>. В примере далее рассмотрена реализация с использованием функции <code>KMeans_arma()</code>.
<prefont color="gray">#$$ importing package and its' dependencies</font> library(ClusterR) <font color="gray">#reading data</font> data <- read.csv(<font color="green">"data.csv"</font>) <font color="gray">#evaluating model</font> model = <- KMeans_arma(data, <font color="#660099">clusters </font> = <font color= "blue">2</font>, <font color="#660099">n_iter </font> = <font color="blue">10</font>, <font color="#660099">seed_mode </font> = <font color= "green">"random_subset"</font>, <font color="#660099">verbose </font> = T, <font color="#660099">CENTROIDS </font> = NULL) <font color="gray">#predicting results</font> predictions = <- predict_KMeans(test_data, model) </pre>
==См. также==