install.packages("gridBase")
install.packages("arules")
install.packages("arulesViz")
install.packages("graphlayouts")library(gridBase)
library(arules)
library(arulesViz)#现有购买记录
tr_list=list(c("Bread", "Milk"),c("Bread", "Diaper", "Beer", "Eggs"),c("Milk","Diaper", "Beer", "Coke"),c("Bread", "Milk","Diaper","Beer"),c("Bread", "Milk", "Diaper","Coke"))#命名各个购物车
names(tr_list)=paste("tr",c(1:length(tr_list)),sep="")tr_list#调用as函数,将链表转化为事务类型
trans=as(tr_list,"transactions")
trans
summary(trans)#####展示事务
##使用LIST函数LIST(trans)###查看数据
inspect(trans)#调用image函数可视化检查事务数据
image(trans)trans@data
trans@data@i
trans@data@p
trans@itemInfo#data.frame model
trans@itemInfo$labels#factor  vector model
trans@itemsetInfo#data.frame model
trans@itemsetInfo$transactionID # shopping cart transactionID#查看每个篮子的商品个数
size(trans)###根据事务大小进行筛选filter_trans=trans[size(trans)>=3]
inspect(filter_trans)###将矩阵格式的数据转化为事务类型tr1=c(0,1,rep(0,3),1)
tr2=c(1,1,0,1,1,0)
tr3=c(1,0,1,1,0,1)
tr4=c(1,1,0,1,0,1)
tr5=c(0,1,1,1,0,1)tr_matrix=matrix(cbind(tr1,tr2,tr3,tr4,tr5),byrow=T,nrow=5)dimnames(tr_matrix)=list(paste("tr",c(1:nrow(tr_matrix)),sep=""),c("Bear","Bread","Coke","Diaper","Eggs","Milk"))
tr_matrix
trans2=as(tr_matrix,"transactions")
inspect(trans2)
trans2@data#将数据框类型的数据转换成事务类型
trID=c(rep(1,2),rep(2:5,each=4))
item=c("Bread", "Milk",
"Bread", "Diaper", "Beer", "Eggs",
"Milk","Diaper", "Beer", "Coke",
"Bread", "Milk","Diaper","Beer",
"Bread", "Milk", "Diaper","Coke")
tran=cbind(trID,item)
tran
trans3=as(tran,"transactionss")#错误做法 Wrong pratice#True
tr_df=as.data.frame(tran)
tr_df=as.data.frame(tran)tr_split=split(tr_df[,"item"],tr_df[,"trID"])
trans3=as(tr_split,"transactions")#也可以这样做
tr_dataf=data.frame(trID=c(rep(1,2),rep(2:5,each=4)),item=c("Bread", "Milk","Bread", "Diaper", "Beer", "Eggs","Milk","Diaper", "Beer", "Coke","Bread", "Milk","Diaper","Beer","Bread", "Milk", "Diaper","Coke"))
tr_dataf
trans4=as(split(tr_dataf[,"item"],tr_dataf[,"trID"]),"transactions")
trans4
inspect(trans4)as.data.frame(tr_dataf[,1])library(openxlsx)
#loading data
readt=read.xlsx("shoppingcart.xlsx")
readt#view data type
apply(readt,2,class)#Transform the data into a form that the apriori algorithm can process
trans5=as(split(readt[,"ProID"],readt[,"UserId"]),"transactions")
inspect(trans5)read2=read.transactions("shoppingcart2.csv",format="single",sep=",",cols=c("UserId","ProId"),header=T)inspect(read2)#When the examples don't have user ID ,you must use foemat="basket"
#read2=read.transactions("shoppingcart2.csv",format="basket",sep=",",cols=c("UserId","ProId"),skip=1)#############################################  remove spaces
# splitblank=function(x){
#   for (i in 1:length(read2[,2])) {
#
# a=unlist(strsplit(read2[,2][i],split=" "))
# b=which(a!="")
# c=paste(a[b],collapse = "")
# read2[,2][i]=c
#   }
#   read2[,2][2]
#
# }
#
# d=splitblank(read2[,2])
# read2
# e=vector()
# for (i in 1:length(read2[,2])){
# a=print(read2[,2][i])
# e=append(e,a)
# }
# e
# read2[,2]=e
# read2
# colnames(read2)[2]=e#View the support of each itemitemFrequency(trans,type="relative")
itemFrequency(trans,type="absolute")#Plot frequency/support bar chart,displays the set of related items for the item
itemFrequencyPlot(trans,col=c("orange","yellow","brown","green","tomato","violet"))#Use Eclat() mining frequency itemsets
freqsets=eclat(trans)
inspect(freqsets)#you also can add parameter restrictionsfrequentsets=eclat(trans,parameter = list(support=0.25,maxlen=10))
summary(frequentsets)
inspect(sort(frequentsets,by="support"))###generate association rules-------------------------------------------------------------------# n items,it has up to 2^n -1 items,up to 3^n-2^(n+1) rulesrules=apriori(trans,parameter =list(support=0.25,confidence=0.5,target="rules"))
inspect(rules)
summary(rules)#Sort rules according to confidence,and view some of the previous rulesrules.sorted=sort(rules,by="confidence",decreasing = T)
rules.sorted
inspect(rules.sorted)#Judge whether the rule is redundanceredundant=is.redundant(rules.sorted)redundant#Find redundant rules
rules.redundant=rules.sorted[redundant]
inspect(rules.redundant)#drop redundant rulesrules.pruned=rules.sorted[!redundant]
inspect(rules.pruned)#relation diagramsortrules=sort(rules,by="lift")
inspect(sortrules)### draw a picturelibrary(arulesViz)
plot(rules.pruned,measure="confidence",method="graph",control=list(type="items"),shading="lift")#interactive
plot(rules,measure = c("support","lift"),shading = "confidence",interactive = T)#View one rule
Milk_rule=apriori(data=trans,parameter = list(support=0.2,confidence=0.5,minlen=2),appearance = list(default="rhs",lhs="Milk"))
inspect(Milk_rule)plot(Milk_rule,by="lift",main="Milk_rule by lift",method="graph",control =list(type="items"))#Draw a balloon diagram of association rules,more than two rules can be drawnplot(c(rules.pruned,Milk_rule),main="Milk_rules by grouped")plot(c(rules.pruned,Milk_rule),method="grouped",main="Milk_rules by grouped")# Using Apriori algorithm to generate the right milk ruleRhs_Milk=apriori(data=trans,parameter = list(support=0.2,confidence=0.5,minlen=2),appearance = list(default="lhs",rhs="Milk"))inspect(Rhs_Milk)redundant1=is.redundant(Rhs_Milk)
Rhr=Rhs_Milk[!redundant1]
inspect(Rhr)install.packages("wordcloud2")
library(wordcloud2)

44 R关联分析——Apriori算法相关推荐

  1. 关联分析(Apriori算法) 面包 牛奶 尿布 啤酒 ...

    关联分析时,需要处理两个关键问题 1 大量数据集中发现模式,计算代价高 2 某些模式可能是虚假的,因为他们是偶然发生的 关联分析例题:从这个商品记录得出顾客喜欢同时购买那几样东西 TID 面包 牛奶 ...

  2. 数据挖掘之关联分析Apriori算法

    文章目录 一.理论知识 1.1.定义 1.2.关联规则 1.3.频繁项集的产生 二.python实战 一.理论知识 许多商业企业在运营中积累了大量的数据.例如:普通超市的收银台每天都会收集到大量的用户 ...

  3. 关联分析——Apriori算法

    Apriori 算法详解 当我们在百度搜索里输入一个单词或单词一部分的时候,搜索引擎会自动补全查询词项,比如:输入"机器",百度下拉词项中就会出现"机器人编程" ...

  4. 【机器学习】关联分析Apriori算法详解以及代码实现

    Apriori算法以及统计学基础 什么是关联分析 简单的统计学基础 Apriori输出频繁集 从频繁项集中挖掘关联规则 什么是关联分析 从大规模数据集中寻找物品间的隐含关系被称作关联分析.而寻找物品的 ...

  5. 挖掘频繁模式、关联和Apriori算法

    挖掘频繁模式.关联和Apriori算法 1. 引入 1.1 基本概念 频繁模式:频繁出现在数据集中的模式 频繁模式挖掘:获取到给定数据集中反复出现的联系 注:模式其实可以理解为,你在淘宝购物,你的购物 ...

  6. R语言使用apriori算法进行关联规则挖掘实战:关联规则概念、频繁项集、支持度(support)、置信度(confidence)、提升度(lift)、apriori算法

    R语言使用apriori算法进行关联规则挖掘实战:关联规则概念.频繁项集.支持度(support).置信度(confidence).提升度(lift).apriori算法 目录

  7. apriori算法_挖掘频繁模式、关联和Apriori算法

    挖掘频繁模式.关联和Apriori算法 1. 引入 1.1 基本概念 频繁模式:频繁出现在数据集中的模式 频繁模式挖掘:获取到给定数据集中反复出现的联系 注:模式其实可以理解为,你在淘宝购物,你的购物 ...

  8. 关联分析Apriori算法和FP-growth算法初探

    1. 关联分析是什么? Apriori和FP-growth算法是一种关联算法,属于无监督算法的一种,它们可以自动从数据中挖掘出潜在的关联关系.例如经典的啤酒与尿布的故事.下面我们用一个例子来切入本文对 ...

  9. 无监督学习-关联分析apriori原理与python代码

    关联分析是一种无监督学习,它的目标就是从大数据中找出那些经常一起出现的东西,不管是商品还是其他什么 item,然后靠这些结果总结出关联规则以用于后续的商业目的或者其他项目需求. 关联分析原理 那么这里 ...

最新文章

  1. VS2017 性能优化方法
  2. [第一财经周刊] 疯狂的团购
  3. Windows网络接口API函数
  4. C/C++ _strlwr_s 函数 – 字符串大写转小写- C语言零基础入门教程
  5. Git异常:fatal: could not create work tree dir 'XXX': No such file or directory
  6. Datatable转换为Json
  7. jdk jre jvm的关系
  8. Nordic Collegiate Programming Contest 2016
  9. C# 如何检测UDP是否被占用_如何悄悄检测微信好友是否被删除?
  10. html5在线加密,HTML技巧——加密网页
  11. OpenSSL文档阅读笔记-RSA Encryption Decryption Example with OpenSSL in C
  12. python读取excel单元格填充色rgb_Python openpyxl读取单元格字体颜色过程解析
  13. C/C++:无法打开.obj文件
  14. Java:实现GrahamScan凸包问题算法(附完整源码)
  15. 一切都好,只是很想念
  16. 2019中国互联网300强
  17. nico和niconiconi dp详解
  18. opencv2 设置摄像头参数问题
  19. mini车f和r的区别_MINI 是一种怎样的车?
  20. 强大的web电子表格控件dhtmlxSpreadsheet免费下载地址

热门文章

  1. 什么是元器件二筛,为何要二筛,如何二筛,二筛的要求与分级要点总结
  2. 学习笔记:Linux入门到进阶
  3. 计算机二级mysql_全国计算机二级MySQL试题(总)
  4. There is insufficient memory for the Java Runtime Environment to continue. 使用虚拟内存
  5. chatgpt报错 Something went wrong 解决方法
  6. Chrome Apps將是Google送給微軟的特洛伊木馬?
  7. 使用java代码实现证件照换背景色
  8. python调用excel的宏_在 Excel 中使用 Python 开发宏脚本
  9. C#学习基本概念之关键字--abstract与sealed
  10. pandas读取csv、txt和xlsx文件