<!DOCTYPE html>
<html lang="zh-Hans">
<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
    <meta name="renderer" content="webkit">
    <meta name="viewport" content="width=device-width,initial-scale=1,maximum-scale=5">
    <title>机器学习 贝叶斯分类详解大数据 | 云图网</title>
    <meta name="keywords" content="分布式文件系统，分布式数据库,并行处理（MPP）数据库，数据挖掘,开源大数据平台,数据中台,数据分析,数据开发,数据治理,数据湖,数据采集">
<meta name="description" content="一：贝叶斯分类介绍 贝叶斯分类器是一个统计分类器。它们能够预测类别所属的概率，如：一个数据对象属于某个类别的概率。贝叶斯分类器是基于贝叶斯定理而构造出来的。对分类方法进行比较的有关研究结果表明：简单贝叶斯分类器（称为基本贝叶斯分类器）在分类性能上与决策树和神经网络都是可比的。在处理大规模数据库时，贝叶斯分类器已表现出较…">
<meta property="og:type" content="article">
<meta property="og:url" content="https://blog.ytso.com/tech/bigdata/9386.html">
<meta property="og:site_name" content="云图网">
<meta property="og:title" content="机器学习 贝叶斯分类详解大数据">
<meta property="og:image" content="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif">
<meta property="og:description" content="一：贝叶斯分类介绍 贝叶斯分类器是一个统计分类器。它们能够预测类别所属的概率，如：一个数据对象属于某个类别的概率。贝叶斯分类器是基于贝叶斯定理而构造出来的。对分类方法进行比较的有关研究结果表明：简单贝叶斯分类器（称为基本贝叶斯分类器）在分类性能上与决策树和神经网络都是可比的。在处理大规模数据库时，贝叶斯分类器已表现出较…">
<link rel="canonical" href="https://blog.ytso.com/tech/bigdata/9386.html">
<meta name="applicable-device" content="pc,mobile">
<meta http-equiv="Cache-Control" content="no-transform">
<link rel="shortcut icon" href="https://imgcdn.ytso.com/wp-content/uploads/2024/10/20241015181503433.jpg">
<link rel='dns-prefetch' href='//cdnjs.cloudflare.com' />
<style id='wp-img-auto-sizes-contain-inline-css' type='text/css'>
img:is([sizes=auto i],[sizes^="auto," i]){contain-intrinsic-size:3000px 1500px}
/*# sourceURL=wp-img-auto-sizes-contain-inline-css */
</style>
<link rel='stylesheet' id='stylesheet-css' href='https://blog.ytso.com/wp-content/themes/justnews/css/style.css?ver=6.21.5' type='text/css' media='all' />
<link rel='stylesheet' id='material-icons-css' href='https://blog.ytso.com/wp-content/themes/justnews/themer/assets/css/material-icons.css?ver=6.21.5' type='text/css' media='all' />
<link rel='stylesheet' id='remixicon-css' href='https://cdnjs.cloudflare.com/ajax/libs/remixicon/4.2.0/remixicon.min.css?ver=6.9.4' type='text/css' media='all' />
<link rel='stylesheet' id='font-awesome-css' href='https://blog.ytso.com/wp-content/themes/justnews/themer/assets/css/font-awesome.css?ver=6.21.5' type='text/css' media='all' />
<style id='wp-block-library-inline-css' type='text/css'>
:root{--wp-block-synced-color:#7a00df;--wp-block-synced-color--rgb:122,0,223;--wp-bound-block-color:var(--wp-block-synced-color);--wp-editor-canvas-background:#ddd;--wp-admin-theme-color:#007cba;--wp-admin-theme-color--rgb:0,124,186;--wp-admin-theme-color-darker-10:#006ba1;--wp-admin-theme-color-darker-10--rgb:0,107,160.5;--wp-admin-theme-color-darker-20:#005a87;--wp-admin-theme-color-darker-20--rgb:0,90,135;--wp-admin-border-width-focus:2px}@media (min-resolution:192dpi){:root{--wp-admin-border-width-focus:1.5px}}.wp-element-button{cursor:pointer}:root .has-very-light-gray-background-color{background-color:#eee}:root .has-very-dark-gray-background-color{background-color:#313131}:root .has-very-light-gray-color{color:#eee}:root .has-very-dark-gray-color{color:#313131}:root .has-vivid-green-cyan-to-vivid-cyan-blue-gradient-background{background:linear-gradient(135deg,#00d084,#0693e3)}:root .has-purple-crush-gradient-background{background:linear-gradient(135deg,#34e2e4,#4721fb 50%,#ab1dfe)}:root .has-hazy-dawn-gradient-background{background:linear-gradient(135deg,#faaca8,#dad0ec)}:root .has-subdued-olive-gradient-background{background:linear-gradient(135deg,#fafae1,#67a671)}:root .has-atomic-cream-gradient-background{background:linear-gradient(135deg,#fdd79a,#004a59)}:root .has-nightshade-gradient-background{background:linear-gradient(135deg,#330968,#31cdcf)}:root .has-midnight-gradient-background{background:linear-gradient(135deg,#020381,#2874fc)}:root{--wp--preset--font-size--normal:16px;--wp--preset--font-size--huge:42px}.has-regular-font-size{font-size:1em}.has-larger-font-size{font-size:2.625em}.has-normal-font-size{font-size:var(--wp--preset--font-size--normal)}.has-huge-font-size{font-size:var(--wp--preset--font-size--huge)}.has-text-align-center{text-align:center}.has-text-align-left{text-align:left}.has-text-align-right{text-align:right}.has-fit-text{white-space:nowrap!important}#end-resizable-editor-section{display:none}.aligncenter{clear:both}.items-justified-left{justify-content:flex-start}.items-justified-center{justify-content:center}.items-justified-right{justify-content:flex-end}.items-justified-space-between{justify-content:space-between}.screen-reader-text{border:0;clip-path:inset(50%);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px;word-wrap:normal!important}.screen-reader-text:focus{background-color:#ddd;clip-path:none;color:#444;display:block;font-size:1em;height:auto;left:5px;line-height:normal;padding:15px 23px 14px;text-decoration:none;top:5px;width:auto;z-index:100000}html :where(.has-border-color){border-style:solid}html :where([style*=border-top-color]){border-top-style:solid}html :where([style*=border-right-color]){border-right-style:solid}html :where([style*=border-bottom-color]){border-bottom-style:solid}html :where([style*=border-left-color]){border-left-style:solid}html :where([style*=border-width]){border-style:solid}html :where([style*=border-top-width]){border-top-style:solid}html :where([style*=border-right-width]){border-right-style:solid}html :where([style*=border-bottom-width]){border-bottom-style:solid}html :where([style*=border-left-width]){border-left-style:solid}html :where(img[class*=wp-image-]){height:auto;max-width:100%}:where(figure){margin:0 0 1em}html :where(.is-position-sticky){--wp-admin--admin-bar--position-offset:var(--wp-admin--admin-bar--height,0px)}@media screen and (max-width:600px){html :where(.is-position-sticky){--wp-admin--admin-bar--position-offset:0px}}
/*wp_block_styles_on_demand_placeholder:69d08af6a7a31*/
/*# sourceURL=wp-block-library-inline-css */
</style>
<style id='classic-theme-styles-inline-css' type='text/css'>
/*! This file is auto-generated */
.wp-block-button__link{color:#fff;background-color:#32373c;border-radius:9999px;box-shadow:none;text-decoration:none;padding:calc(.667em + 2px) calc(1.333em + 2px);font-size:1.125em}.wp-block-file__button{background:#32373c;color:#fff;text-decoration:none}
/*# sourceURL=/wp-includes/css/classic-themes.min.css */
</style>
<link rel='stylesheet' id='wpcom-member-css' href='https://blog.ytso.com/wp-content/plugins/wpcom-member/css/style.css?ver=1.7.19' type='text/css' media='all' />
<script type="text/javascript" src="https://blog.ytso.com/wp-includes/js/jquery/jquery.min.js?ver=3.7.1" id="jquery-core-js"></script>
<script type="text/javascript" src="https://blog.ytso.com/wp-includes/js/jquery/jquery-migrate.min.js?ver=3.4.1" id="jquery-migrate-js"></script>
<link rel="EditURI" type="application/rsd+xml" title="RSD" href="https://blog.ytso.com/xmlrpc.php?rsd" />
<meta name='description' content='一：贝叶斯分类介绍 贝叶斯分类器是一个统计分类器。它们能够预测类别所属的概率，如：一个数据对象属于某个类别的概率。贝叶斯分类器是基于贝叶斯定理而构造出来的。对分类方法进行比较的有关研究结果表明：简单贝…' />
<meta name='keywords' content='分布式文件系统，分布式数据库,并行处理（MPP）数据库，数据挖掘,开源大数据平台,数据中台,数据分析,数据开发,数据治理,数据湖,数据采集' />
<style>:root{--theme-color: #08c; --theme-hover: #07c; --logo-height: 32px; --logo-height-mobile: 26px; --menu-item-gap: 28px; --member-login-bg: url('https://blog.ytso.com/loginwall.jpg'); --header-bg-color: #fff; --header-bg-image: none; --theme-border-radius-s: 3px; --theme-border-radius-m: 5px; --theme-border-radius-l: 8px; --theme-border-radius-xl: 12px; --thumb-ratio-default: 480 / 300; --thumb-ratio-post: 480 / 300; --post-video-ratio: 860 / 482;}</style>
<link rel="icon" href="https://imgcdn.ytso.com/wp-content/uploads/2024/10/20241015181503433.jpg" sizes="32x32" />
<link rel="icon" href="https://imgcdn.ytso.com/wp-content/uploads/2024/10/20241015181503433.jpg" sizes="192x192" />
<link rel="apple-touch-icon" href="https://imgcdn.ytso.com/wp-content/uploads/2024/10/20241015181503433.jpg" />
<meta name="msapplication-TileImage" content="https://imgcdn.ytso.com/wp-content/uploads/2024/10/20241015181503433.jpg" />
    <!--[if lte IE 11]><script src="https://blog.ytso.com/wp-content/themes/justnews/js/update.js"></script><![endif]-->
</head>
<body class="wp-singular post-template-default single single-post postid-9386 single-format-standard wp-theme-justnews lang-cn el-boxed header-fixed">
<header class="header header-fluid">
    <div class="container">
        <div class="navbar-header">
            <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target=".navbar-collapse" aria-label="menu">
                <span class="icon-bar icon-bar-1"></span>
                <span class="icon-bar icon-bar-2"></span>
                <span class="icon-bar icon-bar-3"></span>
            </button>
                        <div class="logo">
                <a href="https://blog.ytso.com/" rel="home">
                    <img src="https://imgcdn.ytso.com/wp-content/uploads/2021/12/20211207181156143.png" alt="云图网">
                </a>
            </div>
        </div>
        <div class="collapse navbar-collapse mobile-style-0">
            <nav class="primary-menu"><ul id="menu-justnews-footer-menu" class="nav navbar-nav wpcom-adv-menu"><li class="menu-item"><a href="https://blog.ytso.com/category/industrynews">业界资讯</a></li>
<li class="menu-item current-post-ancestor active menu-item-style menu-item-style-1 dropdown"><a target="_blank" href="https://blog.ytso.com/category/tech" class="dropdown-toggle">技术专区</a>
<ul class="dropdown-menu menu-item-wrap menu-item-col-5">
	<li class="menu-item"><a href="https://blog.ytso.com/category/tech/cloud">云计算</a></li>
	<li class="menu-item"><a href="https://blog.ytso.com/category/tech/ai">人工智能</a></li>
	<li class="menu-item"><a href="https://blog.ytso.com/category/tech/safety">信息安全</a></li>
	<li class="menu-item current-post-ancestor current-post-parent active"><a href="https://blog.ytso.com/category/tech/bigdata">大数据</a></li>
	<li class="menu-item"><a href="https://blog.ytso.com/category/tech/dev">研发管理</a></li>
	<li class="menu-item"><a href="https://blog.ytso.com/category/tech/webdev">大前端</a></li>
	<li class="menu-item"><a href="https://blog.ytso.com/category/tech/opensource">开源</a></li>
	<li class="menu-item"><a href="https://blog.ytso.com/category/tech/aiops">智能运维</a></li>
	<li class="menu-item"><a href="https://blog.ytso.com/category/tech/pnotes">编程笔记</a></li>
	<li class="menu-item"><a href="https://blog.ytso.com/category/tech/wp">WordPress</a></li>
</ul>
</li>
<li class="menu-item"><a href="https://blog.ytso.com/category/enterprise-strategic-planning">企业战略规划</a></li>
<li class="menu-item"><a href="https://blog.ytso.com/category/download">下载专区</a></li>
<li class="menu-item"><a href="https://blog.ytso.com/category/it%e6%b1%9f%e6%b9%96%e5%8f%b2">江湖史</a></li>
<li class="menu-item"><a href="https://blog.ytso.com/category/notes">随笔记录</a></li>
</ul></nav>            <div class="navbar-action">
                <div class="navbar-search-icon j-navbar-search"><i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-search"></use></svg></i></div><form class="navbar-search" action="https://blog.ytso.com/" method="get" role="search"><div class="navbar-search-inner"><i class="wpcom-icon wi navbar-search-close"><svg aria-hidden="true"><use xlink:href="#wi-close"></use></svg></i><input type="text" name="s" class="navbar-search-input" autocomplete="off" maxlength="100" placeholder="输入关键词搜索..." value=""><button class="navbar-search-btn" type="submit" aria-label="搜索"><i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-search"></use></svg></i></button></div></form>                    <div id="j-user-wrap">
                        <a class="login" href="https://blog.ytso.com/login">登录</a>
                        <a class="login register" href="https://blog.ytso.com/register">注册</a>
                    </div>
                                            <a class="wpcom-btn btn-primary btn-xs publish" href="https://blog.ytso.com/contribute">
                            <i class="fa fa-edit"></i> 投稿                        </a>
                                </div>
        </div>
    </div><!-- /.container -->
</header>

<div id="wrap">    <div class="wrap container">
        <ol class="breadcrumb" vocab="https://schema.org/" typeof="BreadcrumbList"><li class="home" property="itemListElement" typeof="ListItem"><a href="https://blog.ytso.com" property="item" typeof="WebPage"><span property="name" class="hide">云图网</span>首页</a><meta property="position" content="1"></li><li property="itemListElement" typeof="ListItem"><i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-arrow-right-3"></use></svg></i><a href="https://blog.ytso.com/category/tech" property="item" typeof="WebPage"><span property="name">技术专区</span></a><meta property="position" content="2"></li><li property="itemListElement" typeof="ListItem"><i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-arrow-right-3"></use></svg></i><a href="https://blog.ytso.com/category/tech/bigdata" property="item" typeof="WebPage"><span property="name">大数据</span></a><meta property="position" content="3"></li></ol>        <main class="main">
                            <article id="post-9386" class="post-9386 post type-post status-publish format-standard hentry category-bigdata tag-117 tag-mpp tag-121 tag-125 tag-119 tag-120 tag-123 tag-124 tag-122 entry">
                    <div class="entry-main">
                                                                        <div class="entry-head">
                            <h1 class="entry-title">机器学习 贝叶斯分类详解大数据</h1>
                            <div class="entry-info">
                                                                <time class="entry-date published" datetime="2021-07-19T09:31:15+08:00" pubdate>
                                    2021年7月19日 09:31                                </time>
                                <span class="dot">•</span>
                                <a href="https://blog.ytso.com/category/tech/bigdata" rel="category tag">大数据</a>                                                            </div>
                        </div>
                        
                                                <div class="entry-content text-indent text-justify">
                            <p><span style="font-family:'Microsoft YaHei';font-size:14px;"><strong>一：贝叶斯分类介绍 </strong> <br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">贝叶斯分类器是一个统计分类器。它们能够预测类别所属的概率，如：一个数据对象属于某个类别的概率。贝叶斯分类器是基于贝叶斯定理而构造出来的。对分类方法进行比较的有关研究结果表明：简单贝叶斯分类器（称为基本贝叶斯分类器）在分类性能上与决策树和神经网络都是可比的。在处理大规模数据库时，贝叶斯分类器已表现出较高的分类准确性和运算性能。基本贝叶斯分类器假设一个指定类别中各属性的取值是相互独立的。这一假设也被称为：类别条件独立，它可以帮助有效减少在构造贝叶斯分类器时所需要进行的计算。</span></p>
<p><strong><span style="font-family:'Microsoft YaHei';font-size:14px;">二：贝叶斯定理</span></strong></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp;&nbsp;&nbsp; p(A|B) 条件概率 表示在B发生的前提下，A发生的概率；</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp;&nbsp;&nbsp; <noscript><img decoding="async" src="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209091845471" alt="机器学习 贝叶斯分类详解大数据"></noscript><img decoding="async" class="j-lazy" src="https://blog.ytso.com/wp-content/themes/justnews/themer/assets/images/lazy.png" data-original="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209091845471" alt="机器学习 贝叶斯分类详解大数据"><br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp; 基本贝叶斯分类器通常都假设各类别是相互独立的，即各属性的取值是相互独立的。对于特定的类别且其各属性相互独立，就会有：<br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; P(AB|C) = P(A|C)*P(B|C)</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"><strong>三：贝叶斯分类案例</strong></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"><strong>&nbsp;&nbsp;&nbsp;&nbsp; 1：分类属性是离散<br /></strong></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"><strong>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </strong> 假设有样本数为6个的训练集数字如下：</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <noscript><img decoding="async" src="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209093535386" alt="机器学习 贝叶斯分类详解大数据"></noscript><img decoding="async" class="j-lazy" src="https://blog.ytso.com/wp-content/themes/justnews/themer/assets/images/lazy.png" data-original="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209093535386" alt="机器学习 贝叶斯分类详解大数据"><br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">现在假设来又来了一个人是症状为咳嗽的教师，那这位教师是患上感冒、发烧、鼻炎的概率分别是多少呢？这个问题可以用贝叶斯分类来解决，最后三个疾病哪个概率高，就把这个咳嗽的教师划为哪个类,实质就是分别求p(感冒|咳嗽*教师)和P(发烧 | 咳嗽 * 教师)</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">P(鼻炎 | 咳嗽 * 教师) 的概率；<br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">假设各个类别相互独立：</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"><noscript><img decoding="async" src="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209101036559" alt="机器学习 贝叶斯分类详解大数据"></noscript><img decoding="async" class="j-lazy" src="https://blog.ytso.com/wp-content/themes/justnews/themer/assets/images/lazy.png" data-original="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209101036559" alt="机器学习 贝叶斯分类详解大数据"><br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"><noscript><img decoding="async" src="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209103245206" alt="机器学习 贝叶斯分类详解大数据"></noscript><img decoding="async" class="j-lazy" src="https://blog.ytso.com/wp-content/themes/justnews/themer/assets/images/lazy.png" data-original="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209103245206" alt="机器学习 贝叶斯分类详解大数据"><br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;<noscript><img decoding="async" src="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209103309870" alt="机器学习 贝叶斯分类详解大数据"></noscript><img decoding="async" class="j-lazy" src="https://blog.ytso.com/wp-content/themes/justnews/themer/assets/images/lazy.png" data-original="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209103309870" alt="机器学习 贝叶斯分类详解大数据"><br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;P(感冒)=3/6&nbsp;&nbsp;&nbsp; P(发烧)=1/6&nbsp;&nbsp;&nbsp;&nbsp; P(鼻炎)=2/6</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;p(咳嗽) = 3/6&nbsp;&nbsp; P(教师）= 2/6</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;p(咳嗽 | 感冒) = 2/3&nbsp;&nbsp; P(教师 | 感冒) = 1/3</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">故<br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"><noscript><img decoding="async" src="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209102537656" alt="机器学习 贝叶斯分类详解大数据"></noscript><img decoding="async" class="j-lazy" src="https://blog.ytso.com/wp-content/themes/justnews/themer/assets/images/lazy.png" data-original="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209102537656" alt="机器学习 贝叶斯分类详解大数据"><br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp; 按以上方法可分别求&nbsp; P(发烧 | 咳嗽 × 教师) 和P(鼻炎 |<span style="font-family:'Microsoft YaHei';font-size:14px;">咳嗽 × 教师</span> )的概率；</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"><strong>&nbsp;&nbsp;&nbsp; 2：分类属性连续</strong><br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 如果按上面的样本上加一个年龄的属性；因为年龄是连续，不能采用离散变量的方法计算概率。而且由于样本太少，所以也无法分成区间计算；这时，可以假设感冒、发烧、鼻炎分类的年龄都是正态分布，通过样本计算出均值和方差，也就是得到正态分布的密度函数；</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <noscript><img decoding="async" src="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209105858437" alt="机器学习 贝叶斯分类详解大数据"></noscript><img decoding="async" class="j-lazy" src="https://blog.ytso.com/wp-content/themes/justnews/themer/assets/images/lazy.png" data-original="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209105858437" alt="机器学习 贝叶斯分类详解大数据"><br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp; 下面就以求P(年龄=15|感冒)下的概率为例说明：</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp; 第一：求在感冒类下的年龄平均值&nbsp; u=(15+48+12)/3=25</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp; 第二：求在感冒类下年龄的方差 代入下面公司可求：方差=266<br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"><noscript><img decoding="async" src="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209110850560" alt="机器学习 贝叶斯分类详解大数据"></noscript><img decoding="async" class="j-lazy" src="https://blog.ytso.com/wp-content/themes/justnews/themer/assets/images/lazy.png" data-original="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209110850560" alt="机器学习 贝叶斯分类详解大数据"><br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp; 第三：把年龄=15 代入正太分布公式如下：参数代进去既可以求的P(age=15|感冒)的概率</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <noscript><img decoding="async" src="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209111614742" alt="机器学习 贝叶斯分类详解大数据"></noscript><img decoding="async" class="j-lazy" src="https://blog.ytso.com/wp-content/themes/justnews/themer/assets/images/lazy.png" data-original="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170209111614742" alt="机器学习 贝叶斯分类详解大数据"><br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">其他属性按离散方法可求；</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"><strong>四：概率值为0处理</strong><br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; 假设有这种情况出现，在训练集上感冒的元祖有10个，有0个是孩子，有6个是学生，有4个教师；当分别求</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp;&nbsp; &nbsp; P(孩子|感冒) =0； P(学生|感冒)=6/10 ； P(教师|感冒)=4/10&nbsp; ;出现了概率为0的现象，为了避免这个现象，在假设训练元祖数量大量的前提下，可以使用拉普拉斯估计法，把每个类型加1这样可求的分别概率是</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; P(孩子|感冒) = 1/13&nbsp; ； P(学生|感冒) = 7/13 &nbsp; ; P(教师|感冒)=4/13<br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"><strong>&nbsp;五：垃圾邮件贝叶斯分类案例</strong></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"><strong>&nbsp;&nbsp;&nbsp;&nbsp;</strong>&nbsp;&nbsp; 1：准备训练集数据</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">假设postingList为一个六个邮件内容,classVec=[0,1,0,1,0,1]为邮件类型，设1位垃圾邮件 </span></p>
<pre class="prism-language-java">def loadDataSet(): 
    postingList =[['my','dog','has',' flea','problems','help','please'], 
                  ['mybe','not','take','him','to','dog','park','stupid'], 
                  ['my','dalmation','is','so','cute','i','love','hime'], 
                  ['stop','posting','stupid','worthless','garbage'], 
                  ['mr','licks','ate','my','steak','how','to','stop','hime'], 
                  ['quit','buying','worthless','dog','food','stupid','quit']] 
    classVec =[0,1,0,1,0,1] 
    return postingList,classVec</pre>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">2：根据所有的邮件内容创建一个所有单词集合<strong>&nbsp; </strong></span></p>
<pre class="prism-language-java">def createVocabList(dataSet): 
    vocabSet =set([]) 
    for document in dataSet: 
        vocabSet = vocabSet | set(document) 
    return list(vocabSet)</pre>
<p><strong></strong>测试后获取所有不重复单词的集合见下一共：<br /><noscript><img decoding="async" src="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170302134425444" alt="机器学习 贝叶斯分类详解大数据"></noscript><img decoding="async" class="j-lazy" src="https://blog.ytso.com/wp-content/themes/justnews/themer/assets/images/lazy.png" data-original="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170302134425444" alt="机器学习 贝叶斯分类详解大数据"><br /><strong><br /></strong> </p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">3：根据2部所有不重复的单词集合对每个邮件内容向量化&nbsp; <br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"></span></p>
<pre class="prism-language-java">def bagOfWords2VecMN(vocabList,inputSet): 
    returnVec =[0]*len(vocabList) 
    for word in inputSet: 
        returnVec[vocabList.index(word)] +=1 
    return returnVec</pre>
<p>测试后可得如下，打印内容为向量化的六个邮件内容</p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; </span><br /><span style="font-family:'Microsoft YaHei';font-size:14px;"><noscript><img decoding="async" src="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170302135525762" alt="机器学习 贝叶斯分类详解大数据"></noscript><img decoding="async" class="j-lazy" src="https://blog.ytso.com/wp-content/themes/justnews/themer/assets/images/lazy.png" data-original="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170302135525762" alt="机器学习 贝叶斯分类详解大数据"></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">4：训练模型，此时就是分别求p(垃圾|文档) = p(垃圾)*p（文档|垃圾）/p(文档)</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"></span></p>
<pre class="prism-language-java">def trainNBO(trainMatrix,trainCategory): 
    numTrainDocs  = len(trainMatrix) 
    numWords =len(trainMatrix[0]) 
    #计算p(垃圾)的概率 
    pAbusive = sum(trainCategory)/float(numTrainDocs) 
    #为了防止一个概率为0，假设都有一个 
    p0Num =ones(numWords); 
    p1Num = ones(numWords) 
    p0Denom =2.0;p1Denom=2.0; 
    for i in range(numTrainDocs): 
        if trainCategory[i] ==1: 
            p1Num +=trainMatrix[i] 
            p1Denom +=sum(trainMatrix[i]) 
        else: 
            p0Num +=trainMatrix[i] 
            p0Denom +=sum(trainMatrix[i]) 
    p1Vect = np.log((p1Num/p1Denom)) 
    p0Vect = np.log(p0Num/p0Denom) 
    return p0Vect,p1Vect,pAbusive</pre>
<p>对训练模型进行测试结果如下： </p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"><noscript><img decoding="async" src="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170302142705456" alt="机器学习 贝叶斯分类详解大数据"></noscript><img decoding="async" class="j-lazy" src="https://blog.ytso.com/wp-content/themes/justnews/themer/assets/images/lazy.png" data-original="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170302142705456" alt="机器学习 贝叶斯分类详解大数据"><br /></span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;">5：定义分类方法</span></p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"></span></p>
<pre class="prism-language-java">def classifyNB(vec2Classify,p0Vec,p1Vec,pClass1): 
    p1 =sum(vec2Classify * p1Vec) +math.log(pClass1) 
    p0 = sum(vec2Classify * p0Vec)+math.log(1.0-pClass1) 
    if p1&gt;p0: 
        return 1 
    else: 
        return 0 
</pre>
<p> 6 以上分类完成，下面就对其进行测试，测试方法如下： </p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"></span></p>
<pre class="prism-language-java">def testingNB(): 
    listOPosts,ListClasses = loadDataSet(); 
    myVocabList = createVocabList(listOPosts) 
    trainMat=[] 
    for postinDoc in listOPosts: 
        trainMat.append(bagOfWords2VecMN(myVocabList,postinDoc)) 
    p0V,p1V,pAb =trainNBO(trainMat,ListClasses) 
    testEntry =['stupid','my','dalmation'] 
    thisDoc = array(bagOfWords2VecMN(myVocabList,testEntry)) 
    print testEntry,'classified as',classifyNB(thisDoc,p0V,p1V,pAb)</pre>
<p>结果如下： </p>
<p><span style="font-family:'Microsoft YaHei';font-size:14px;"><noscript><img decoding="async" src="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170302143038730" alt="机器学习 贝叶斯分类详解大数据"></noscript><img decoding="async" class="j-lazy" src="https://blog.ytso.com/wp-content/themes/justnews/themer/assets/images/lazy.png" data-original="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif" data-original="https://img-blog.csdn.net/20170302143038730" alt="机器学习 贝叶斯分类详解大数据"><br /></span></p>
<div class="entry-readmore"><div class="entry-readmore-btn"></div></div>                                                        <div class="entry-copyright"><p>原创文章，作者：Maggie-Hunter，如若转载，请注明出处：https://blog.ytso.com/tech/bigdata/9386.html</p></div>                        </div>

                        <div class="entry-tag"><a href="https://blog.ytso.com/tag/%e5%88%86%e5%b8%83%e5%bc%8f%e6%96%87%e4%bb%b6%e7%b3%bb%e7%bb%9f%ef%bc%8c%e5%88%86%e5%b8%83%e5%bc%8f%e6%95%b0%e6%8d%ae%e5%ba%93" rel="tag">分布式文件系统，分布式数据库</a><a href="https://blog.ytso.com/tag/%e5%b9%b6%e8%a1%8c%e5%a4%84%e7%90%86%ef%bc%88mpp%ef%bc%89%e6%95%b0%e6%8d%ae%e5%ba%93%ef%bc%8c%e6%95%b0%e6%8d%ae%e6%8c%96%e6%8e%98" rel="tag">并行处理（MPP）数据库，数据挖掘</a><a href="https://blog.ytso.com/tag/%e5%bc%80%e6%ba%90%e5%a4%a7%e6%95%b0%e6%8d%ae%e5%b9%b3%e5%8f%b0" rel="tag">开源大数据平台</a><a href="https://blog.ytso.com/tag/%e6%95%b0%e6%8d%ae%e4%b8%ad%e5%8f%b0" rel="tag">数据中台</a><a href="https://blog.ytso.com/tag/%e6%95%b0%e6%8d%ae%e5%88%86%e6%9e%90" rel="tag">数据分析</a><a href="https://blog.ytso.com/tag/%e6%95%b0%e6%8d%ae%e5%bc%80%e5%8f%91" rel="tag">数据开发</a><a href="https://blog.ytso.com/tag/%e6%95%b0%e6%8d%ae%e6%b2%bb%e7%90%86" rel="tag">数据治理</a><a href="https://blog.ytso.com/tag/%e6%95%b0%e6%8d%ae%e6%b9%96" rel="tag">数据湖</a><a href="https://blog.ytso.com/tag/%e6%95%b0%e6%8d%ae%e9%87%87%e9%9b%86" rel="tag">数据采集</a></div>
                        <div class="entry-action">
                            <div class="btn-zan" data-id="9386"><i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-thumb-up-fill"></use></svg></i> 赞 <span class="entry-action-num">(0)</span></div>
                                                    </div>

                        <div class="entry-bar">
                            <div class="entry-bar-inner">
                                                                <div class="entry-bar-info entry-bar-info2">
                                    <div class="info-item meta">
                                                                                    <a class="meta-item j-heart" href="javascript:;" data-id="9386"><i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-star"></use></svg></i> <span class="data">0</span></a>                                        <a class="meta-item" href="#comments"><i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-comment"></use></svg></i> <span class="data">0</span></a>                                                                            </div>
                                    <div class="info-item share">
                                                                                    <a class="meta-item mobile j-mobile-share" href="javascript:;" data-id="9386" data-qrcode="https://blog.ytso.com/tech/bigdata/9386.html">
                                                <i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-share"></use></svg></i> 生成海报                                            </a>
                                                                                    <a class="meta-item wechat" data-share="wechat" target="_blank" rel="nofollow noopener noreferrer" href="#">
                                                <i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-wechat"></use></svg></i>                                            </a>
                                                                                    <a class="meta-item weibo" data-share="weibo" target="_blank" rel="nofollow noopener noreferrer" href="#">
                                                <i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-weibo"></use></svg></i>                                            </a>
                                                                                    <a class="meta-item qq" data-share="qq" target="_blank" rel="nofollow noopener noreferrer" href="#">
                                                <i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-qq"></use></svg></i>                                            </a>
                                                                                    <a class="meta-item qzone" data-share="qzone" target="_blank" rel="nofollow noopener noreferrer" href="#">
                                                <i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-qzone"></use></svg></i>                                            </a>
                                                                                    <a class="meta-item douban" data-share="douban" target="_blank" rel="nofollow noopener noreferrer" href="#">
                                                <i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-douban"></use></svg></i>                                            </a>
                                                                                    <a class="meta-item linkedin" data-share="linkedin" target="_blank" rel="nofollow noopener noreferrer" href="#">
                                                <i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-linkedin"></use></svg></i>                                            </a>
                                                                                    <a class="meta-item facebook" data-share="facebook" target="_blank" rel="nofollow noopener noreferrer" href="#">
                                                <i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-facebook"></use></svg></i>                                            </a>
                                                                                    <a class="meta-item twitter" data-share="twitter" target="_blank" rel="nofollow noopener noreferrer" href="#">
                                                <i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-twitter"></use></svg></i>                                            </a>
                                                                            </div>
                                    <div class="info-item act">
                                        <a href="javascript:;" id="j-reading"><i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-article"></use></svg></i></a>
                                    </div>
                                </div>
                            </div>
                        </div>
                    </div>
                                            <div class="entry-page">
                    <div class="entry-page-prev j-lazy" style="background-image: url('https://blog.ytso.com/wp-content/themes/justnews/themer/assets/images/lazy.png');" data-original="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif">
                <a href="https://blog.ytso.com/tech/bigdata/9385.html" title="神经网络分类算法 数据挖掘详解大数据" rel="prev">
                    <span>神经网络分类算法 数据挖掘详解大数据</span>
                </a>
                <div class="entry-page-info">
                    <span class="pull-left"><i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-arrow-left-double"></use></svg></i> 上一篇</span>
                    <span class="pull-right">2021年7月19日 09:31</span>
                </div>
            </div>
                            <div class="entry-page-next j-lazy" style="background-image: url('https://blog.ytso.com/wp-content/themes/justnews/themer/assets/images/lazy.png');" data-original="http://blog.ytso.com/zb_users/plugin/LazyLoad/usr/loading.gif">
                <a href="https://blog.ytso.com/tech/bigdata/9387.html" title="决策树分类详解大数据" rel="next">
                    <span>决策树分类详解大数据</span>
                </a>
                <div class="entry-page-info">
                    <span class="pull-right">下一篇 <i class="wpcom-icon wi"><svg aria-hidden="true"><use xlink:href="#wi-arrow-right-double"></use></svg></i></span>
                    <span class="pull-left">2021年7月19日 09:31</span>
                </div>
            </div>
            </div>
                                        <error>
    <code>wp_die</code>
    <title><![CDATA[WordPress &amp;rsaquo; Error]]></title>
    <message><![CDATA[&lt;h1&gt;Error establishing a Redis connection&lt;/h1&gt;
&lt;p&gt;To disable Redis, delete the &lt;code&gt;object-cache.php&lt;/code&gt; file in the &lt;code&gt;/wp-content/&lt;/code&gt; directory.&lt;/p&gt;
]]></message>
    <data>
        <status>500</status>
    </data>
</error>
<!--
Performance optimized by Redis Object Cache. Learn more: https://wprediscache.com

Retrieved 1841 objects (282 KB) from Redis using Predis (v2.4.0).
-->
