conda install dask
conda install dask-core
git clone https://github.com/dask/dask.gitcd daskpython -m pip install .
import dask.array as dax = da.random.uniform(low=0, high=10, size=(10000, 10000), # normal numpy code chunks=(1000, 1000)) # break into chunks of size 1000x1000y = x + x.T - x.mean(axis=0) # Use normal syntax for high level algorithms# DataFramesimport dask.dataframe as dddf = dd.read_csv('2018-*-*.csv', parse_dates='timestamp', # normal Pandas code blocksize=64000000) # break text into 64MB chunkss = df.groupby('name').balance.mean() # Use normal syntax for high level algorithms# Bags / listsimport dask.bag as dbb = db.read_text('*.json').map(json.loads)total = (b.filter(lambda d: d['name'] == 'Alice') .map(lambda d: d['balance']) .sum())
def inc(x): return x + 1def double(x): return x * 2def add(x, y): return x + ydata = [1, 2, 3, 4, 5]output = []for x in data: a = inc(x) b = double(x) c = add(a, b) output.append(c)total = sum(output)45
import daskoutput = []for x in data: a = dask.delayed(inc)(x) b = dask.delayed(double)(x) c = dask.delayed(add)(a, b) output.append(c)total = dask.delayed(sum)(output)
total.visualize()
>>> total.compute()45
# from sklearn.grid_search import GridSearchCV from dklearn.grid_search import GridSearchCV# from sklearn.pipeline import Pipeline from dklearn.pipeline import Pipeline
from sklearn.datasets import make_classificationX, y = make_classification(n_samples=10000, n_features=500, n_classes=2, n_redundant=250, random_state=42)from sklearn import linear_model, decompositionfrom sklearn.pipeline import Pipelinefrom dklearn.pipeline import Pipelinelogistic = linear_model.LogisticRegression()pca = decomposition.PCA()pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)])grid = dict(pca__n_components=[50, 100, 150, 250], logistic__C=[1e-4, 1.0, 10, 1e4], logistic__penalty=['l1', 'l2'])# from sklearn.grid_search import GridSearchCVfrom dklearn.grid_search import GridSearchCVestimator = GridSearchCV(pipe, grid)estimator.fit(X, y)
from dask.distributed import Clientc = Client('scheduler-address:8786')
PS:公号内回复「Python」即可进入Python 新手学习交流群,一起 100 天计划!
老规矩,兄弟们还记得么,右下角的 “在看” 点一下,如果感觉文章内容不错的话,记得分享朋友圈让更多的人知道!
【神秘礼包获取方式】
`单行代码`