逻辑回归实验

逻辑回归实验,包含简单梯度下降

Posted by 徐金杰 on November 1, 2018

逻辑回归代码

import random

from numpy import mat, array, arange
from numpy.ma import exp, ones, shape
import matplotlib.pyplot as plt

plt.ion()

def getData():
    fun = lambda x: 2 * x + 2

    # al = [fun(i,i+2) for i in range(100)]

    res = []
    labels = []

    for x in [random.random() * 5 for _ in range(100)]:
        for y in [random.random() * 20 for _ in range(100)]:
            res.append([x, y-2*x])
            if fun(x) > y:
                labels.append(1)
            else:
                labels.append(0)

    return res, labels


def sigmoid(inX):
    return 1.0 / (1 + exp(-inX))


def gradAscent(dataMatIn, classLabels):
    dataMatrix = mat(dataMatIn)  # 转换为 NumPy 矩阵
    labelMat = mat(classLabels).transpose()  # 首先将数组转换为 NumPy 矩阵,然后再将行向量转置为列向量
    m, n = shape(dataMatrix)
    alpha = 0.0001
    maxCycles = 100000
    weights = ones((n, 1))
    for k in range(maxCycles):
        h = sigmoid(dataMatrix * weights)

        error = (labelMat - h)

        # print(dataMatrix.transpose())
        # print(error)
        # 2行n列 * n行1列
        weights = weights + alpha * dataMatrix.transpose() * error

        if k % 1000:
            plotBestFit(dataMatIn, labelMat, weights)

    return array(weights)


def plotBestFit(dataArr, labelMat, weights):
    plt.clf()
    n = shape(dataArr)[0]
    xcord1 = []
    ycord1 = []
    xcord2 = []
    ycord2 = []
    for i in range(n):
        if int(labelMat[i]) == 1:
            xcord1.append(dataArr[i, 0])
            ycord1.append(dataArr[i, 1])
        else:
            xcord2.append(dataArr[i, 0])
            ycord2.append(dataArr[i, 1])
    # fig = plt.figure()
    # ax = fig.add_subplot(111)
    plt.scatter(xcord1, ycord1, s=3, c='red', marker='s')
    plt.scatter(xcord2, ycord2, s=3, c='green')
    x = arange(0, 5.0, 0.1)

    y = weights[0] * x + weights[1]

    plt.plot(x, y)
    plt.xlabel('X')
    plt.ylabel('Y')
    # plt.show()
    plt.pause(0.033)

def testLR():
    dataMat, labelMat = getData()
    dataArr = array(dataMat)
    weights = gradAscent(dataArr, labelMat)
    print(weights)



testLR()

过拟合,到后面直线来回抖动

正常拟合,但是后期也存在拟合速度过慢的问题