Python代码:准备训练样本的数据和标签:train_X4000.txt、train_y4000.txt 放于tsne.py当前目录.(具体t-SNE – Laurens van der Maaten http://lvdmaaten.github.io/tsne/,Python implementation),

tsne.py代码:(为了使得figure显示数据的标签,代码做了简单修改)

#!/usr/bin/env python

# -*- coding: utf-8 -*-

#

# tsne.py

#

# Implementation of t-SNE in Python. The implementation was tested on Python 2.5.1, and it requires a working

# installation of NumPy. The implementation comes with an example on the MNIST dataset. In order to plot the

# results of this example, a working installation of matplotlib is required.

# The example can be run by executing: ipython tsne.py -pylab

#

#

# Created by Laurens van der Maaten on 20-12-08.

# Copyright (c) 2008 Tilburg University. All rights reserved.

import numpy as Math

import pylab as Plot

def Hbeta(D = Math.array([]), beta = 1.0):

"""Compute the perplexity and the P-row for a specific value of the precision of a Gaussian distribution."""

# Compute P-row and corresponding perplexity

P = Math.exp(-D.copy() * beta);

sumP = sum(P)+1e-6;

H = Math.log(sumP) + beta * Math.sum(D * P) / sumP;

P = P / sumP;

return H, P;

def x2p(X = Math.array([]), tol = 1e-5, perplexity = 30.0):

"""Performs a binary search to get P-values in such a way that each conditional Gaussian has the same perplexity."""

# Initialize some variables

print "Computing pairwise distances..."

(n, d) = X.shape;

sum_X = Math.sum(Math.square(X), 1);

D = Math.add(Math.add(-2 * Math.dot(X, X.T), sum_X).T, sum_X);

P = Math.zeros((n, n));

beta = Math.ones((n, 1));

logU = Math.log(perplexity);

# Loop over all datapoints

for i in range(n):

# Print progress

if i % 500 == 0:

print "Computing P-values for point ", i, " of ", n, "..."

# Compute the Gaussian kernel and entropy for the current precision

betamin = -Math.inf;

betamax = Math.inf;

Di = D[i, Math.concatenate((Math.r_[0:i], Math.r_[i+1:n]))];

(H, thisP) = Hbeta(Di, beta[i]);

# Evaluate whether the perplexity is within tolerance

Hdiff = H - logU;

tries = 0;

while Math.abs(Hdiff) > tol and tries < 50:

# If not, increase or decrease precision

if Hdiff > 0:

betamin = beta[i].copy();

if betamax == Math.inf or betamax == -Math.inf:

beta[i] = beta[i] * 2;

else:

beta[i] = (beta[i] + betamax) / 2;

else:

betamax = beta[i].copy();

if betamin == Math.inf or betamin == -Math.inf:

beta[i] = beta[i] / 2;

else:

beta[i] = (beta[i] + betamin) / 2;

# Recompute the values

(H, thisP) = Hbeta(Di, beta[i]);

Hdiff = H - logU;

tries = tries + 1;

# Set the final row of P

P[i, Math.concatenate((Math.r_[0:i], Math.r_[i+1:n]))] = thisP;

# Return final P-matrix

print "Mean value of sigma: ", Math.mean(Math.sqrt(1 / beta))

return P;

def pca(X = Math.array([]), no_dims = 50):

"""Runs PCA on the NxD array X in order to reduce its dimensionality to no_dims dimensions."""

print "Preprocessing the data using PCA..."

(n, d) = X.shape;

X = X - Math.tile(Math.mean(X, 0), (n, 1));

(l, M) = Math.linalg.eig(Math.dot(X.T, X));

Y = Math.dot(X, M[:,0:no_dims]);

return Y;

def tsne(X = Math.array([]), no_dims = 2, initial_dims = 50, perplexity = 30.0):

"""Runs t-SNE on the dataset in the NxD array X to reduce its dimensionality to no_dims dimensions.

The syntaxis of the function is Y = tsne.tsne(X, no_dims, perplexity), where X is an NxD NumPy array."""

# Check inputs

if X.dtype != "float64":

print "Error: array X should have type float64.";

return -1;

#if no_dims.__class__ != "": # doesn't work yet!

# print "Error: number of dimensions should be an integer.";

# return -1;

# Initialize variables

X = pca(X, initial_dims).real;

(n, d) = X.shape;

max_iter = 1000

initial_momentum = 0.5;

final_momentum = 0.8;

eta = 500;

min_gain = 0.01;

Y = Math.random.randn(n, no_dims);

dY = Math.zeros((n, no_dims));

iY = Math.zeros((n, no_dims));

gains = Math.ones((n, no_dims));

# Compute P-values

P = x2p(X, 1e-5, perplexity);

P = P + Math.transpose(P);

P = P / (Math.sum(P));

P = P * 4; # early exaggeration

P = Math.maximum(P, 1e-12);

# Run iterations

for iter in range(max_iter):

# Compute pairwise affinities

sum_Y = Math.sum(Math.square(Y), 1);

num = 1 / (1 + Math.add(Math.add(-2 * Math.dot(Y, Y.T), sum_Y).T, sum_Y));

num[range(n), range(n)] = 0;

Q = num / Math.sum(num);

Q = Math.maximum(Q, 1e-12);

# Compute gradient

PQ = P - Q;

for i in range(n):

dY[i,:] = Math.sum(Math.tile(PQ[:,i] * num[:,i], (no_dims, 1)).T * (Y[i,:] - Y), 0);

# Perform the update

if iter < 20:

momentum = initial_momentum

else:

momentum = final_momentum

gains = (gains + 0.2) * ((dY > 0) != (iY > 0)) + (gains * 0.8) * ((dY > 0) == (iY > 0));

gains[gains < min_gain] = min_gain;

iY = momentum * iY - eta * (gains * dY);

Y = Y + iY;

Y = Y - Math.tile(Math.mean(Y, 0), (n, 1));

# Compute current value of cost function

if (iter + 1) % 10 == 0:

C = Math.sum(P * Math.log(P / Q));

print "Iteration ", (iter + 1), ": error is ", C

# Stop lying about P-values

if iter == 100:

P = P / 4;

# Return solution

return Y;

if __name__ == "__main__":

print "Run Y = tsne.tsne(X, no_dims, perplexity) to perform t-SNE on your dataset."

print "Running example on 2,500 MNIST digits..."

X = Math.loadtxt("train_X4000.txt");

#X = X[:100]

labels = Math.loadtxt("train_y4000.txt");

#labels = labels[:100]

Y = tsne(X, 2, 38, 20.0);

fil = open('Y.txt','w')

for i in Y:

fil.write(str(i[0])+' '+str(i[1])+'\n')

fil.close()

colors=['b', 'c', 'y', 'm', 'r']

idx_1 = [i1 for i1 in range(len(labels)) if labels[i1]==1]

flg1=Plot.scatter(Y[idx_1,0], Y[idx_1,1], 20,color=colors[0],label='1');

idx_2= [i2 for i2 in range(len(labels)) if labels[i2]==2]

flg2=Plot.scatter(Y[idx_2,0], Y[idx_2,1], 20,color=colors[1], label='2');

idx_3= [i3 for i3 in range(len(labels)) if labels[i3]==3]

flg3=Plot.scatter(Y[idx_3,0], Y[idx_3,1], 20, color=colors[2],label='3');

idx_4= [i4 for i4 in range(len(labels)) if labels[i4]==4]

flg4=Plot.scatter(Y[idx_4,0], Y[idx_4,1], 20,color=colors[3], label='4');

idx_5= [i5 for i5 in range(len(labels)) if labels[i5]==5]

flg5=Plot.scatter(Y[idx_5,0], Y[idx_5,1], 20, color=colors[4],label='5');

# flg=Plot.scatter(Y[:,0], Y[:,1], 20,labels);

Plot.legend()

Plot.savefig('figure4000.pdf')

Plot.show()

python用tsne降维_tsne降维可视化相关推荐

  1. Python使用tsne进行高维数据可视化实战:二维可视化、三维可视化

    Python使用tsne进行高维数据可视化实战:二维可视化.三维可视化 # 绘制二维可视化图像并添加标签字符函数 def plot_embedding(data, label, title):x_mi ...

  2. python用tsne降维_tSNE降维

    我有两套数据训练和测试.这两个数据集分别有30213和30235个项目,每个项目有66个维度.在 我正在尝试应用scikit learn的t-SNE将维数降到2.由于数据集很大,如果我试图一次性处理整 ...

  3. python使用TSNE为影像组学(radiomics)数据进行降维可视化分析

    python使用TSNE为影像组学(radiomics)数据进行降维可视化分析 目录 python使用TSNE为影像组学(radiomics)数据进行降维可视化分析

  4. python用tsne降维图像_python代码实现TSNE降维数据可视化教程

    TSNE降维jne免费资源网 降维就是用2维或3维表示多维数据(彼此具有相关性的多个特征数据)的技术,利用降维算法,可以显式地表现数据.(t-SNE)t分布随机邻域嵌入 是一种用于探索高维数据的非线性 ...

  5. TSNE高维数据降维可视化工具 + python实现

    文章目录 1.概述 1.1 什么是TSNE 1.2 TSNE原理 1.2.1入门的原理介绍 1.2.2进阶的原理介绍 1.2.2.1 高维距离表示 1.2.2.2 低维相似度表示 1.2.2.3 惩罚 ...

  6. python 降维 聚类_比PCA降维更高级——(R/Python)t-SNE聚类算法实践指南

    作者介绍:Saurabh.jaju2 Saurabh是一名数据科学家和软件工程师,熟练分析各种数据集和开发智能应用程序.他目前正在加州大学伯克利分校攻读信息和数据科学硕士学位,热衷于开发基于数据科学的 ...

  7. Python数据集可视化:抽取数据集的两个特征进行二维可视化、主成分分析PCA对数据集降维进行三维可视化(更好地理解维度之间的相互作用)

    Python数据集可视化:抽取数据集的两个特征进行二维可视化.主成分分析PCA对数据集降维进行三维可视化(更好地理解维度之间的相互作用) 目录 Python数据集可视化:抽取数据集的两个特征进行二维可 ...

  8. PCA图像数据降维及重构误差分析实战并使用TSNE进行异常数据可视化分析

    PCA图像数据降维及重构误差分析实战并使用TSNE进行异常数据可视化分析 目录 PCA图像数据降维及重构误差分析实战并使用TSNE进行异常数据可视化分析</

  9. ML之DR:基于鸢尾花(Iris)数据集利用多种降维算法(PCA/TSVD/LDA/TSNE)实现数据降维并进行二维和三维动态可视化应用案例

    ML之DR:基于鸢尾花(Iris)数据集利用多种降维算法(PCA/TSVD/LDA/TSNE)实现数据降维并进行二维和三维动态可视化应用案例 目录 基于鸢尾花(Iris)数据集利用多种降维算法(PCA ...

最新文章

  1. Dispatch 执行ABC任务,执行完成之后刷新UI,指定任务D
  2. linux nice线程,linux nice 线程
  3. 网络开源框架之libevent使用实例
  4. Python语言学习之文件夹那些事:python和文件夹的使用方法之详细攻略
  5. 【bzoj3289】 Mato的文件管理
  6. 揭秘:下一代的VisualStudio将会怎样?
  7. P5952-[POI2018]水箱【最小生成树】
  8. C++模板剖析:函数模板、类模板解析
  9. 生成随机数 java
  10. Redis五大数据类型String、Hash、List、Set、ZSet
  11. 安装nagios中php安装报错 configure error xml2-config not foud
  12. 24. Django部署:项目部署
  13. spring boot 拦截器 或 Spring AOP 方式记录请求日志
  14. tp5使用mpdf生成pdf文件时,碰到division by zero问题解决记录
  15. 一篇走心的文章和一个不起眼的引流方法
  16. 【学习笔记】别怕,EXCELVBA其实很简单(第2版)
  17. java实现gdal栅格矢量化_gdal栅格矢量化 - osc_lfs4vsih的个人空间 - OSCHINA - 中文开源技术交流社区...
  18. android 如何开启相机LED闪光灯
  19. VS2017 安装 Microsoft Visual Studio Installer Project
  20. 钱多多软件制作第三天

热门文章

  1. Ping命令和网络端口
  2. View 的 onMeasure 方法
  3. dx12 龙书第十一章学习笔记 -- 模板
  4. python括号匹配o(1)复杂度_Python面试题:使用栈处理括号匹配问题
  5. centos7怎么查看ip地址
  6. 施耐德lxm23du使用说明书_施耐德还出钢笔?简评德系入门钢笔施耐德BASE
  7. iOS13-适配夜间模式/深色外观(Dark Mode)
  8. 关于 Jenkins 的 Unity 3D Plugin
  9. 浅谈图像生成模型 Diffusion Model 原理
  10. 天体赛练习集 简要题解 - L2