Download as pdf or txt
Download as pdf or txt
You are on page 1of 2

Operation R Python​ (pandas)

Class name data.frame data.table DataFrame

Load package/module # built-in library(data.table) import pandas as pd

Get help ?data.frame help(package=data.table) help(pd)


?`[.data.frame` ?data.table ?pd.DataFrame

Create table data.frame(a=c(0.3, 1.5, 7), data.table(i=c("x", "y", "z"), pd.DataFrame(data={"a": [0.3, 1.5, 7],
b=c(NA, 6, 2), a=c(0.3, 1.5, 7), "b": [None, 6, 2]},
row.names=c("x", "y", "z")) b=c(NA, 6, 2), key="i") index=["x", "y", "z"])

Save table to text file write.table(T, "table.txt", write.table(T, "table.txt", T.to_csv("table.txt",


col.names=​TRUE​, col.names=​TRUE​, header=​True
row.names=​TRUE​, row.names=FALSE, index=​True​,
sep="\t" sep="\t", sep="\t",
na=​"NA"​, na=​"NA"​, na_rep="NA",
quote=FALSE) quote=FALSE) quoting=​csv.QUOTE_MINIMAL​)

Read table from text file read.delim("table.txt", fread("table.txt", pd.read_table("table.txt",


Ex: a b c* header=​TRUE​, header=​"auto"​, header=​"infer"​,
i ------------- row.names=1, ​# use setkey(T, i) for indexing index_col=0,
x | 0.3 7 NA col.names=c(NA, NA, NULL), select=c("i", "a", "b"), usecols=["a", "b"],
y | 1.5 NA 6 na.strings=​"NA"​) na.strings=​"NA"​) na_values="NA")

Print first/last rows head(T); tail(T) head(T); tail(T) T.head(); T.tail()

Table quartiles summary(T) summary(T) T.describe()

Table information str(T) str(T); tables() T.info()

Table dimensions dim(T) dim(T) T.shape

Number of rows nrow(T) nrow(T) len(T)

Number of columns ncol(T) ncol(T) len(T.columns)

Index rownames(T) rownames(T) T.index


rownames(T) <- new_index setkey(T, new_index) ​# column name of T.index = new_index

Columns colnames(T) colnames(T) T.columns


colnames(T) <- new_columns setnames(T, new_columns) T.columns = new_columns
Operation R Python​ (pandas)
Class name data.frame data.table DataFrame

Get 1st row T[1, ] T[1, ]; T[1] T.iloc[0]; T.iloc[0, :]

Get row "x" T["x", ] T["x", ]; T["x"] T.loc["x"]; T.loc["x", :]

Get 1st column T[[1]]; T[1] T[[1]]; T[, 1, with=F] T.iloc[:, 0]

Get column "a" T$a; T[["a"]]; T["a"]; T[, "a"] T$a; T[["a"]]; T[, a]; T[, "a", with=F] T.a; T["a"]; T.loc[:, "a"]

Add column T$d <- new_column T[, d := new_column]; ​# see help(`:=`) T["d"] = new_column;
# or any of the above forms set(T, NULL, "d", new_column) T.insert(0, "d", new_column)

Remove column T[["d"]] <- NULL; #​ idem T[, d := NULL]; del T["d"];
T <- T[names(T) != "d"] set(T, NULL, “d”, NULL) T.drop("d", axis=1, inplace=True)

Get subset (example) T[1:2, c("a", "c")] T[1:2, .(a, c)] ​# .(a, c) == list(a, c) T.ix[1:2, ["a", "c"]] ​# ix == iloc + loc

Reorder columns* T <- T[order(colnames(T))] setcolorder(T, new_order) T = T.reindex_axis(new_order, axis=1)

Sort* T <- T[order(T), ]? setorder(T, i) ​# setkey also sorts T.sort(0, inplace=True)

Apply func to rows/cols* lapply(T,func); apply(T, 2, func) T[, lapply(.SD, func), .SDcols=-1] T.apply()

Apply func elementwise* apply(T, 1:2, func) ??? T.applymap()

Mean column* T$sum <- rowMeans(T) T[, sum := rowMeans(.SD)] T["sum"] = T.mean(axis=1)

Add two columns* T$b + T$c T$b + T$c; T[, b + c] T.b + T.c

* Untested.

More about:
● R data.table​ differeces from data.frame (data.table FAQ) - ​http://datatable.r-forge.r-project.org/datatable-faq.pdf
● pandas DataFrame​ comparison with data.frame - ​http://pandas.pydata.org/pandas-docs/stable/comparison_with_r.html
● http://graphlab.com/learn/translator/
● https://sites.google.com/site/gappy3000/home/pandas_r
● https://drive.google.com/folderview?id=0ByIrJAE4KMTtaGhRcXkxNHhmY2M&usp=sharing
● http://mathesaurus.sourceforge.net/matlab-python-xref.pdf

You might also like