Download as xlsx, pdf, or txt
Download as xlsx, pdf, or txt
You are on page 1of 57

Type How to

IF
IF
IF
WHILE
FOR
TERNARY
LIST
LIST
LIST Add as is
LIST Unpack and add
LIST update
LIST iterate
LIST iterate
LIST iterate
LIST iterate

LIST remove
LIST Index
LIST sort
LIST flatten
LIST to string
LIST to string
LIST count
LIST reverse
LIST reverse
LIST reverse
LIST duplicates
LIST duplicates
LIST insert
LIST remove
LIST remove
LIST remove
LIST remove a range
LIST sort list of classes
LIST copy
LIST sort two keys
LIST sort

LIST sort - ENumerate

LIST sort - ZIP


LIST sort - except first
STRING Quotes
STRING whitespace
STRING whitespace
DICT comprehension

STRING search
STRING search any
STRING search all
STRING iterate
STRING replace

STRING remove
DICT create

DICT Add

DICT Add

DICT delete
DICT delete

DICT delete
DICT sort

DICT sort by value


DICT key check
DICT value check
DICT keys list
DICT values list
DICT Find Key by Value
DICT Create from List
DICT create from List of Tuples
FUNCT MIN()
FUNCT FILTER()

FUNCT FILTER()

FUNCT FILTER()

TUPLE UNPACK

FUNCTION _MAIN_
FUNCTION ITERate
FUNCTION FILTER()

FUNCTION ZIP()
DICT CReate from LIST

FUNCTION Enumerate

FUNCTION EVAL

FUNCTION RANGE

FUNCTION SUM

FUNCTION MAP

FUNCTION FILTER()
FUNCTION REDUCE()

FUNCTION ZIP()
DICT create from ZIP

FUNCTION Recursive

FUNCTION iterative
NOTES Iterator and Iterable
NOTES Iterator
FUNCTION LAMBDA

FUNCTION *ARGS and *KWARGS


FUNCTION UNPACK * and **
fstring Number & String Format
fstring Dict
fstring Alignment
fstring Debug
LIST Clear

LIST Index of max value


LIST Merge
LIST Contains
LIST remove
LIST duplicates
FUNCTION ABS()

DICT to LIST of Tuples


LIST Shuffle
LIST Intersection
LIST subtract
LIST Transpose
LIST Create SUBLIST
LIST combinations
LIST Flatten
LIST comprehension
LIST to string
DICT comprehension
DICT sort
DICT pprint
DICT max
STRING alphabet
STRING Concatenate
STRING ZeroFILL
FUNCTION TYPE
STRING Multiline
FUNCTION isinstance

FUNCTION Global
DATETIME add days

LIST All Equal

LIST All Unique

LIST Average

DICT Dict to List

LIST Non Unique

LIST Unique

LIST Has Duplicates

LIST Init 2D

MAP

LIST most frequent

DICT sort
LIST to dict
Syntax
"if x > 10 and x < 20 and x != 15:
print('x iselse
print('The greater than 10')"
block')

BREAK
while CONTINUE
x <= 100 PASS
and count < 10:
if x % 2 == 0:
for i in range(1, 11):
result = 'High' if x > 10 else 'Low'
listOfStrings1 = ['Hi'] * 20
listOfStrings2 = ['Hi' for i in range(20)]
wordList.append(["one", "use", "data"])
wordList.extend(["one", "use", "data"])
list_of_numbers[0:3] = [10, 10, 10]
for word in wordList:
print(wordList[i])
i += 1
print(wordList[i])
i += 1
[print(i) for i in wordList]

def remove_all_occurrences(list_obj, value):


while value in list_obj:
list_of_num.remove(value)
index_pos_list = [ i for i in range(len(list_of_elems)) if list_of_elems[
listOfNum.sort(reverse=True)
flat_ls = [item for sublist in ls for item in sublist]
s
ls==''.join(ls)
[1, 2, 3, 4]
s = ''.join(str(i) for i in ls)
sample_list.count(x). Returns zero if not present
ls.reverse()
ls_reversed = list(reversed(ls))
ls_reversed = ls[::-1]
list2 = list(set(list1))
from collections import OrderedDict
list2 = list(OrderedDict.fromkeys(list1))
ls.insert(1, 'yellow')
ls1.remove('b')
sample_list.pop(i). No index last element is removed
sample_list.clear() or sample_list * 0
del ls[1:3]
student_list = [s1, s2, s3]
student_list.sort(key=lambda s: s.name, reverse=True)
favCities = cities.copy()
lst = [(1,2), (3,2), (3,3), (1,0)]
lst.sort(key=lambda x: (x[1], x[0]))
returns None. Sorts in PLACE

lst = ['Alice', 'Bob', 'Ann', 'Frank']


en_lst = list(enumerate(lst))
en_lst.sort(key=lambda x: x[1])
print(en_lst)

a = [5, 7, 9]
b = ['Alice', 'Bob', 'Ann']
zipped = list(zip(a, b))
# [(5, 'Alice'), (7, 'Bob'), (9, 'Ann')]
zipped.sort(key=lambda
lst = [99, 3, 8, 1, 12]x: x[1])
lst_2 = lst[:1] + sorted(lst[1:])
Triple quotes
y.strip() for multiline
- remove strings '''Yes''' or """Yes"""
white space

Py' + 'thon' becomes 'Python'


top_earners = [ ➊(k, v) ➋for k, v in employees.items() if v >= 100000]

if "sample" in mainStr:
if "Hello" not in mainStr:
if "SAMple".lower() in mainStr.lower():
result = any(([True if subStr in mainStr else False for subStr in listOfs
result = all(([True if subStr in mainStr else False for subStr in listOfs
for elem in sampleStr[ : :-1]:
sample_string = "This is a sample string"
char_to_replace = {'s': 'X',
'a': 'Y',
'i': 'Z'}
# Iterate over all key-value pairs in dictionary
for key, value in char_to_replace.items():
# Replace key character with value character in string
sample_string = sample_string.replace(key, value)
OR

sample_string = sample_string.translate(str.maketrans(char_to_replace))

strObj = strObj[1 : : ] - First Char remove


strObj = strObj[:-1:] - Last Char Remove
Remove Index range:
if len(strObj) > stop :
strObj = strObj[0: start:] + strObj[stop + 1::]
student_age = {'Jack': 32, 'Ritika': 31, 'Mark' : 22, 'Mathew' : 27}

word_freq.update({ 'the' : 21,


'how': 81,
'from': 16} )

word_freq. update( { key: value


for key, value in zip(list_of_keys, list_of_values)}
dict1.update( dict2 )

for key, value in word_freq.items():


if value == 23:
del word_freq[key] -- Will give error
for key, value in dict(word_freq).items():
if value == 23:
del word_freq[key] -- Create a copy and delete
word_freq = { key: value
for key, value in word_freq.items()
if value % 2 != 0}

for key in dict(word_freq):


if word_freq[key] % 2 == 0:
word_freq.pop(key)
[ print(key , " :: " , value) for (key, value) in sorted(wordsFreqDict.it

[ print(key , " :: " , value) for (key, value) in sorted(wordsFreqDict.it


listofTuples = sorted(wordsFreqDict.items()
if word_freq.get(key) is not None: , reverse=True, key=lambda x:
if word_freq.get(key, -1) != -1:
if val == value:
if any([True
dictkeys for(wordFreqDic.keys())
= list k,v in word_freq.items() if v == value]):
dictkeys = [x for x in wordFreqDic if x.startswith('t')]
dictValues = list (wordFreqDic.values())
listOfKeys = [key for (key,
zipbObj = zip(listOfStr, value) in dictOfWords.items() if value == 43
listOfInt)
dictOfWords = dict(zipbObj)
studentsDict = dict(listofTuples) key=lambda x: x[1])
minValue = min(sampleDict.items(),
minValue = min(listOfNum)
filteredArray = list(filter(lambda x : x not in array2, array1))
print(list(filter(lambda x: x[0].lower() in 'aeiou', creature_names)))

def filter_vowels(letter):
vowels = ['a', 'e', 'i', 'o', 'u']
return True if letter in vowels else False
filtered_vowels = filter(filter_vowels, letters)
aquarium_tanks = [11, False, 18, 21, "", 12, 34, 0, [], {}]
list(filter(None, aquarium_tanks))

r, g, *other = (192, 210, 100, 0.5) . Other = [100, 0.5]


x, y, *z, *t = (10, 20, 30, '10:30') - Error two *
numbers = (*odd_numbers, *even_numbers)
Python assign the '__main__' to the __name__ variable when you run the
script directly and the module name if you import the script as a module
colors = ['red', 'green', 'blue']
iterator = iter(colors)
for color in iterator:
print(color)
filter(function or None, iterable) --> filter object

zip(iter1 [,iter2 [...]]) --> zip object


for i, j, k, l in zip([1, 2, 3], "foo", ("one", "two", "three"), {"alpha"
d = dict(zip(keys, values))

enumerate(iterable[, start=0]) -> iterator for index, value of iterable


for index, value in enumerate([110, 45, 12, 891, "one"]):
or index, value in enumerate({'name': 'Jane', 'age': 26, 'salary': 40000}
for index, value in enumerate("hello", start=2):
print( list(enumerate("hello", start=2)) )
eval(expr, globals=None, locals=None)
eval("8 + 4 - 2 * 3")

range([start,] stop [, step]) -> range object


list(range(5, 10))
list(range(-100, -95))
list(range(1, 20, 3))

sum(iterable, [start]) -> number


sum({1: "one", 2: "two", 3: "three"})
sum([1, 2, 3, 4, 5])
sum([10, 20, 30], 100)

F = list(map(lambda x: (float(9)/5)*x + 32, C))


a = [1, 2, 3, 4]
b = [17, 12, 11, 10]
c = [-1, -4, 5, 9]
list(map(lambda x, y, z : x+y+z, a, b, c))
fibonacci = [0,1,1,2,3,5,8,13,21,34,55]
odd_numbers = list(filter(lambda x: x % 2, fibonacci))
import functools
functools.reduce(lambda x,y: x+y, [47,11,42,13])

f = lambda a,b: a if (a > b) else b


reduce(f, [47,11,42,102,13])
reduce(lambda x, y: x+y, range(1,101))

cities_and_population = [("Zurich", 415367),


("Geneva", 201818),
("Basel", 177654),
("Lausanne", 139111),
("Bern", 133883),
("Winterthur", 111851)]
cities, populations = list(zip(*cities_and_population))
text2morse = dict(zip(l1, l2))

def factorial(n):
if n == 0:
return 1
else:
return n * factorial(n-1)

def iterative_factorial(n):
result = 1
for i in range(2,n+1):
result *= i
return result
Every iterator is also an iterable, but not every iterable is an iterator
An iterator can be created from an iterable by using the function 'iter'.
needs either a method '__iter__', which returns an iterator, or a '__geti
def iterable(obj):
try:
iter(obj)
return True
except TypeError:
return False
for element in [34, [4, 5], (4, 5), {"a":4}, "dfsdf", 4.5]:
print(element, "iterable: ", iterable(element))
Lazy Evaluation
converter = lambda x : x*2 if x < 10 else (x*3 if x < 20 else x)

def publishStudentDetails2(startMsg, endMsg, *args , collegeName="XYZ" ,


publishStudentDetails2("START", "END" , ("ss", "sdd",) , name="Jack", pho

def mySum(*args):
return sum(args)
def fun(**kwargs):
for key in kwargs:
print("%s = %s" % (key, kwargs[key]))
# Driver code
fun(name="geeks", ID="101", language="Python")
first, *_, last = [1, 2, 3, 5, 7]
values = (1, 2, 3, 4, 5)
def add_numbers(*args):
total = 0
for num in args:
total += num
print(total)
return total

add_numbers(*values)

def myFun(arg1, arg2, arg3):


print("arg1:", arg1)
print("arg2:", arg2)
print("arg3:", arg3)
# Using **kwargs to pass arguments to this function :
kwargs = {"arg1" : "InterviewBit", "arg2" : "Blog", "arg3" : "Packing and
myFun(**kwargs)
x = 4.5
print(f'This will print out the variable x: {x:.3f}')
print(f'{name:10}')

print(f'Number\t\tSquare\t\t\tCube')
for x in range(1, 11):
x = float(x)
print(f'{x:5.2f}\t\t{x*x:6.2f}\t\t{x*x*x:8.2f}')

print(f'{"My Grocery List":^30s}')


print(f'{"="*30}')
print(f'{strApples}\t{numApples:10d}\t\t${prcApples:>5.2f}')
fstring = f'{person1.get("name")} is {person1.get("age")} and is {p
for number in numbers:
print(f'The number is {number:+}')
fstring = f'Her name is {name} and {"she" if gender == "female" els

print(f'{"apple" : <30}')
print(f'{"apple" : ^30}')
print(f'{number
items[:] = [] = }')
items.clear()

list3 = list1 + list2


list3 = [*list1,
max_value *list2]
= max(a_list)
list3 = [item
max_index for sublist in zip(list1, list2) for item in sublist]
= a_list.index(max_value)
values.remove(1)
list3 = list(set(list1- Returns ValueError
+ list2)) -- Combine and remove Dups
indices
while = [index
'datagy' for
in index,
values: item in
[list1.append(item) for item in list2] enumerate(a_list) if item == max(a_li
if values.remove('datagy')
'oranges'
list3 not in items:
= list(set(list1).intersection(set(list2))) - Common
an_array = np.array(a_list)
values
if = =np.argmax(an_array)
[value for value>in
items.count('datagy')
index 0:values if value != 'datagy']
values.pop(0) - IndexError
any(item=='datagy' for item in items)
del values[1:3]-- Index Error
values = [item for item in values if str(item).isdigit()]
deduplicated_list = list()
[deduplicated_list.append(item) for item in duplicated_list if item not i
dictionary = dict.fromkeys(duplicated_list)
deduplicated_list = list(dictionary)
deduplicated_list = list(set(duplicated_list))

from collections import OrderedDict


duplicated_list = [1,1,2,1,3,4,1,2,3,4]
deduplicated_list = list(OrderedDict.fromkeys(duplicated_list))

duplicated_list = [1,1,2,1,3,4,1,2,3,4]
deduplicated_list = np.unique(np.array(duplicated_list)).tolist()
abs_array = np.abs(sample_array)
duplicated_list = [1,1,2,1,3,4,1,2,3,4]
df['Distance Absolute'] = df['Distance'].abs()
deduplicated_list = pd.Series(duplicated_list).unique().tolist()

list_of_tuples = list(sample_dict.items())
list_of_tuples = [(key, value) for key, value in sample_dict.items()]
list_of_tuples = list(zip(keys, values))
list_of_tuples
intersection == [item
list()for item in list1 if item in list2]
random.shuffle(a_list)
for key in sample_dict:
intersection
shuffled = list(set(list1).intersection(set(list2)))
= random.sample(a_list,
list_of_tuples.append((key, len(a_list))
sample_dict.get('key')))
intersection = list(set(list1) & set(list2))
intersection = list(np.intersect1d(list1, list2))
subtracted = [element1 - element2 for (element1, element2) in zip(l
array = np.array(list_of_lists)
transposed_array = array.T
transposed_list_of_lists = transposed_array.tolist()
from itertools import combinations
for i in range(len(list_of_lists[0])):
sample_list = ['a', 'b', 'c']
row = list()
list_combinations = list()
for sublist in list_of_lists:
for n inrow.append(sublist[i])
range(len(sample_list) + 1):
transposed.append(row)
flat_list = [item for sublist
list_combinations in list_of_lists for item in
+= list(combinations(sample_list, n))sublist]
new_ages = {key:value for (key, value) in ages.items() if value > 2
transposed
oddeven = [[row[i] for
= {key:('odd' ifrow in list_of_lists]
value % 2 == 1 elsefor i in range(len(list_o
'even') for (key, valu
from
for n itertools
in import chain
range(len(sample_list)
print(transposed) + 1):
names
new_list = ['Harry',
==[i 'Hermione', 'Ron',
101) 'Neville', 'Luna']
flat_list =for i in+=range(1,
list(chain(*list_of_lists))
list_combinations
chunked_list if ifor
% 2i==in0range(0,
if i % 5len(our_
list(combinations_with_replacement(sample_
[our_list[i:i+chunk_size] == 0]
index
flat_list = {k:v= [ifor for (k,
j v)
in in enumerate(names)}
nested_list for i in j]
smalldict
common_items = [i for i in list1 if ('a',
= {k: bigdict[k] for k in 'b', 'c')}
i in list2]
ages = {key:ages[key] for key in ages.keys() - {'marcus', 'nik'}}
import
joined pprint
dictionary = ' = {value:key for (key:value)
'.join(our_list) -- Error if in dictionary}
different data types
import
sample_dictstring = {'name': 'Nik', 'age': 31, 'gender': 'male',
alphabet
'books':
sorted_dict = list(string.ascii_lowercase)
['Harry Potter',for
= {key:value 'Lord
key, ofvalue
the Rings']}
in sorted(sales.items(), ke
new_word = word
pprint.pprint(sample_dict) + str(integer)
new_word
for i in =
max_keys = f'{word}{integer}'
range(97,
[key for123): key, value in ages.items() if value == max(ages
str(number).zfill(7)
new_word = '{}{}'.format(word, integer)
alphabet.append(chr(i))
a_string.rjust(10, ' ')
message = ( ... f"Hello, my name is {name}. "
f"I
type(x)can calculate
is not intpi to two places: {pi:4.3}. "
f"But I would rather be eating {food}.
isinstance([1,2,3,4], list)

t = 1

def increment():
global t # now t inside the function is same as t outside the funct
t = t + 1
print(t) # Displays 2

increment()
from datetime import datetime, timedelta

def add_days(n, d = datetime.today()):


return d + timedelta(n)
add_days(5, date(2020, 10, 25)) # date(2020, 10, 30)
def all_equal(lst):
return len(set(lst)) == 1
def all_unique(lst):
return len(lst) == len(set(lst))
def average(*args):
return sum(args, 0.0) / len(args)
def dict_to_list(d):
return list(d.items())

from collections import Counter


def filter_non_unique(lst):
return [item for item, count in Counter(lst).items() if count == 1]

from collections import Counter

def filter_unique(lst):
return [item for item, count in Counter(lst).items() if count > 1]
def has_duplicates(lst):
return len(lst) != len(set(lst))
def initialize_2d_list(w, h, val = None):
return [[val for x in range(w)] for y in range(h)]
def map_dictionary(itr, fn):
return dict(zip(itr, map(fn, itr)))
map_dictionary([1, 2, 3], lambda x: x * x) # { 1: 1, 2: 4, 3: 9 }
def most_frequent(lst):
return max(set(lst), key = lst.count)

def sort_dict_by_key(d, reverse = False):


return dict(sorted(d.items(), reverse = reverse))
def sort_dict_by_value(d, reverse = False):
return dict(sorted(d.items(), key = lambda x: x[1], reverse = reverse))
def to_dictionary(keys, values):
return dict(zip(keys, values))
Notes

Append is faster

clear all elements


Dictionary changed size during iteration in python
No change in size error
Operator Overlo
Inheritance
Overriding
Encapsulation
print(c3) # Became possible because we have added __str__ method
print(c.getDescription())
print(c.getName()) # car has no method getName() but it is accessible through class Vehicle
c = B()
c.m1()
print(b1.checkbalance())
print(b1.__balance) -- Error
Type How to

LOGGING
loGGING
Syntax
finally:
print('in finally block')
finally:
EmailOrderStatusToUser(id) # Either ways send order email to user
# handle all other exceptions
pass
print('Unexpected Error!')
exit()
raise # Just raise
# raise UserDoesNotExist # Don't do this or you'll lose the stack trace

print 'FILE:'
print body
for filename in logfiles:
print filename
Type How to

Array Create
Array Reshape
Array Dimension
Array Slicing
Array Boolean Indexin
Array multiply
Array Flatten
Array Transpose
Array Concatenate
Array shape
Array append
Array unique
Array String
Array Round
Array Arith
Array Mean
Array Average
Array max,min
Array Where
Array Extract
Array view
Array Random
Array Replace
Array Select
Array Remove Nan
y
x == x[[0,1,2],
np.ones(5) [0,1,0]] -- [1 4 5]
x
a == np.ones([2,2], dtype=int)
np.array([[1,2,3],[4,5,6]])
x
a =
= np.array([[ 0, 1, 2],[ 3, 4, 5],[ 6, 7, 8],[ 9, 10, 11]])
np.asarray(lst)
b = a.reshape(3,2)
rows = np.array([[0,0],[3,3]])
x = np.arange(5, dtype=float)
cols = np.array([[0,2],[0,2]])
x = np.arange(10,20,2)
a=np.array([[1,2,3],[4,5,6]])
Syntax
y
x == x[rows,cols]
np.linspace(10,20,5)
a.shape=(3,2)
https://github.com/rougier/numpy-100/blob/master/100_Numpy_exercises_with_solutions.md
-- Corner elements
np.linspace(10,20,
x = np.array([[ 5, endpoint=False)
0, 1, 2],[ 3, 4, 5],[ 6, 7, 8],[ 9, 10, 11]])
x[x>5]
a = np.arange(24)
x = np.array([[ 0, 1, 2],[ 3, 4, 5],[ 6, 7, 8],[ 9, 10, 11]])
b = a.reshape(2,4,3)
z
a == x[1:4,1:3]
np.array([np.nan, 1,2,np.nan,3,4,5])
ndarray.ndim
y = x[1:4,[1,2]] -- Indexing
a[~np.isnan(a)]
a = np.array([1,2,3,4])
b = np.array([10,20,30,40])
c
a == np.arange(8).reshape(2,4)
a*b
a=np.array([[1,2],[3,4]])
np.char.center(arr, width,fillchar)
a.flatten()
a ==np.arange(12).reshape(3,4)
b np.array([[5,6],[7,8]])
np.char.center('hello', 20,fillchar='*')
np.transpose(a) or a.T
np.concatenate((a,b),axis=1)
a = np.array([[1,2,3],[4,5,6]])
np.concatenate((a,b),axis=0)
np.char.split ('TutorialsPoint,Hyderabad,Telangana', sep=',')
b ==np.resize(a,
a (3,2))
np.array([[1,2,3],[4,5,6]])
np.char.join(':','dmy')
b.shape
np.append(a, [[7,8,9]],axis=0)
u,indices=np.unique(a,
np.char.replace ('He isreturn_index=True)
a good boy', 'is', 'was')
np.append(a, [[7,8,9]],axis=1)
u,indices=np.unique(a,return_counts=True)
np.around(a, decimals=1)
np.floor(a)
np.add(a,b)
np.ceil(a)
np.multiply(a,b)
a = np.array([[1,2,3],[3,4,5],[4,5,6]])
np.divide(a,b)
np.mean(a)
np.subtract(a,b)
np.mean(a, axis=0)
np.average(a)
maxindex= np.argmax(a)
np.mean(a, axis=1) 4],weights=[4,3,2,1], returned=True)
np.average([1,2,3,
a.flatten()[maxindex]
x = np.arange(9.).reshape(3, 3)
np.average(a, axis=1, weights=wt)
y=np.where(x>3)
maxindex=np.argmax(a,
x = random.rand() -- Float axis=1)
from 0 to 1
x=random.randint(100, size=(5))
condition = np.mod(x,2)==0
x = random.randint(100, size=(3, 5))
col
b = = 'consumption_energy'
np.extract(condition,
a.view() -- x) Orig will not be changed
x = random.choice([3, 5, 7, 9])
conditions
Slice creates a view 5, 7, 9],(df2[col]
= [df2[col] >= 400, < 400) & (df2[col] > 200), df2[col] <= 200]
x
b =
= random.choice([3,
np.where(a > 0,'medium',
a, 0) size=(3, 5))
choices = ["high", 'low']
np.where((a > 2) & (a < 6) | (a == 7), -1, 100)
df2["energy_class"] = np.select(conditions, choices,
default = np.nan)
x[~np.isnan(x)]
~np.isnan(x).any(axis=1)
th_solutions.md
#int8, int16, int32, int64 can be replaced by equivalent string 'i1',
'i2','i4', etc

[col] <= 200]


Type How to
DATAFRAME Sample Data
DATAFRAME Slicing
DATAFRAME Create from List
DATAFRAME Create from DICT
DATAFRAME Properties
DATAFRAME Filter columns
DATAFRAME Filter columns
DATAFRAME Filter columns
DATAFRAME Filter columns
DATAFRAME Rename columns
DATAFRAME Remove Rows
DATAFRAME Drop columns
DATAFRAME Iterate Rows
DATAFRAME nan with blanks
DATAFRAME Drop Nan
DATAFRAME Add column
DATAFRAME Check Nan
DATAFRAME Select Rows
DATAFRAME Select Columns
DATAFRAME Filter rows
DATAFRAME Delete Rows
DATAFRAME Casting
DATAFRAME Row Count
DATAFRAME Apply
DATAFRAME Rearrange Colum
DATAFRAME GroupBy
DATAFRAME GroupBy
DATAFRAME Merge
DATAFRAME Merge
DATAFRAME conditional select
DATAFRAME map
DATAFRAME dates
DATAFRAME Binning
DATAFRAME Examples
DATAFRAME Calendar
DATAFRAME summary func
DATAFRAME aggregate func
DATAFRAME Reverse Rows
DATAFRAME Nlargest
DATAFRAME Format
# Filter Rows
df.index usingall
- Returns DataFrame.query()
row labels as series
df2=df.query("Courses
df.index.values - Returns == 'Spark'")
all row labels as list
df = pd.DataFrame(names, columns=['Name'])
df.dtypes - Retruns datatypes of columns
dictionary
#Using = {- mean,std, etc.,
variable
df.describe()
zipped = list(zip(names,
'Name':
value='Spark' ['Katie', 'Nik', ages, locations))
'James', 'Evan'],
movies_df['genre'].describe()
df =
Syntax pd.DataFrame(zipped,
'Age': [32,
df2=df.query("Courses 32, 36, 31],columns=['Name',
== @value") 'Age', 'Location'])
movies_df['genre'].value_counts().head(10)
df.loc[rowselect,colselect] - Integer list[0,1,2] ,slice[1:3],single value [1]
df ='Location':
movies_df.isnull().sum() ['London', 'Toronto', 'Atlanta', 'Madrid']
pd.read_csv('https://raw.githubusercontent.com/datagy/data/main/sales.csv')pd.read_csv('h
df.iloc[rowselect,colselect] - Rows - Label list'Location'])
['a','b'] , Label 'a', Label Slice r1:r3
df =}
#inpace pd.DataFrame(data, columns=['Name', 'Age',
df['region'].unique() - Cols - Colname List ['c1','c2'] colname 'c1' slice - c1:
df.loc[df['Courses']
df = pd.DataFrame(dictionary
df.query("Courses == == value]
'Spark'",inplace=True)
df[['a','b']].drop_duplicates()
df.query("Courses == - Unique Values in two columns
'Spark'",inplace=True)
df=pd.DataFrame(listoflists,columns=column_nameslist,index=row_labellist)
df.loc[df['Courses']
df2=df[4:] # Returns!= 'Spark']
rows from 4th row
df['date'].value_counts()
df.query("Courses != 'Spark'")- Unique values with counts
df.loc[df['Courses'].isin(values)]
df2=df[1:-1]
df[df["Courses"]
#Not equals, in # & Removes
pd.DataFrame(np.random.rand(4,
== first
'Spark']
multiple and
8), last
conditions row
columns=list('abcdefgh'))
df.query("Courses in ('Spark','PySpark')")
df.loc[~df['Courses'].isin(values)]
df2=df[2:4] # Return rows between 2 and 4
df[df['Courses'].str.contains("Spark")]
df.query("Courses
Iterate all rows!=
pd.show_versions()
# 'Spark'")
using DataFrame.iterrows()
df.query("`Courses
df.loc[(df['Discount'] Fee` >=
>= 23000&and
1000) `Courses Fee`<=
(df['Discount'] <=2000)]
24000")
df[df['Courses'].str.lower().str.contains("spark")]
df.query("Courses
for index, row in in ('Spark','PySpark')")
df.iterrows():
df.loc[(df['Discount']
df.drop(labels=None, >=
df[df['Courses'].str.startswith("P")]
df.query("`Courses Fee` >=1200)
axis=0, & (df['Fee']
index=elem
23000") >= 23000
or list, )]
columns=elem or list, inplace=False)
print (index,row["Fee"], row["Courses"])
df1 = df.drop(index=['r1','r2'])
df.drop(["Courses", "Fee"], axis = 1, inplace=True)
df.query("`Courses Fee` >= 23000 and `Courses Fee` <= 24000") -- Drop multiple columns
df=df.drop(df.index[-1])
drinks.select_dtypes(include=['number',
df.drop(df.columns[[1,2]], - Delete last row
'object',
axis = 1, inplace=True) 'category', 'datetime']).head()
#
df2Iterate
= all rows using DataFrame.itertuples()
df1 = df.rename({'a':
df.drop(index=['r1','r2'])
df2=df.drop(df.loc[:,
'A', 'b': 'B'},- Delete
'Courses':'Fee'].columns,
axis=1)
rows by label
# Other ways
df.apply(lambda to
f.rename(columns={'a':
Filter
row:
for row in df.itertuples(index Rows True):inplace =axis
row[df['Courses'].isin(['Spark','PySpark'])])
'A', 'b': = 'B'}, True)
= 1) - Drop columns between two columns
df1=df.drop(df.index[[1,3]])
df.loc[df['Courses'] == value] - Delete rows by position
print (getattr(row,'Index'),getattr(row, "Fee"), getattr(row, "Courses"))
df2=df.dropna()
for col in df.columns:
df.loc[df['Courses'] != 'Spark']
df2 = df.replace(np.nan,
df2=df.dropna(axis=0) '', regex=True)
or axis = 1
if 'Fee' in col:
df.loc[df['Courses'].isin(values)]
# Using DataFrame.index
df2 = df[['Courses','Fee'
df2=df.dropna().reset_index(drop=True) ]] = df[['Courses','Fee' ]].fillna('')
for idx del df[col]
df.loc[~df['Courses'].isin(values)]
in df.index:
df.fillna('',
df2 = inplace=True)
df.loc[['r2','r3','r6']]
df2=df.dropna(how='all') #inSelect
row Rows by<= Index Label List
df.loc[(df['Discount']
print(df['Fee'][idx], >=-1000)
all Nan & (df['Discount']
df['Courses'][idx]) 2000)]
df2 = df['Discount']=df['Discount'].fillna(0)
df.loc['r1':'r5']
df2=df.dropna(subset=['Courses','Fee']) # Select Rows by
- Drop Label Index Range
df.loc[(df['Discount']
value = >= 1200) &-(df['Fee']
df.isnull().values.any() on entire >=rows
df
with
23000 )] Nan in select column
df2 = df.iloc[2]
df.dropna(inplace=True) # Select Row by Index
value
df2 = = df[['Fee','Duration']].isnull().values.any()
df.iloc[[2,3,6]] # Select Rows by Index List
df2=df.dropna(axis=1,how='all')
df2 = df.assign(Blank_Column=" ", NaN_Column
df[df["Courses"]
result
df2 = df.iloc[1:5]
== 'Spark']
= df.isnull().sum() # Select- count
Rows by entire=Index
NanInteger dfnp.nan, None_Column=None)
Range
df2 = df[["Courses","Fee","Duration"]]
df.insert(0,"Blank_Column",
df[df['Courses'].str.contains("Spark")] " ") # select multile columns
result
df = df[['Fee','Duration']].isnull().sum()
==df.astype({"Fee": int, "Discount": float},errors='ignore')
df2 df.loc[:, ["Courses","Fee","Discount"]]
df[df['Courses'].str.lower().str.contains("spark")]
import # Select Random columns
count
counts =pandas
df[['Fee',
as pd
=df.isnull().sum().sum()
movies.genre.value_counts()
'Discount']] =df [['Fee', 'Discount']].apply(pd.to_numeric)
df2 = df.loc[:,'Fee':'Discount']
df[df['Courses'].str.startswith("P")]
technologies = { # Select columns between two columns
counts.nlargest(3)
df['Fee'] = pd.to_numeric(df['Fee'])
df2 'Courses':["Spark","PySpark","Python","pandas"],
= df.iloc[:,[1,3,4]] # Select columns by Index
df
df2 ==df.astype(str)
df.iloc[:,1:4]
df.apply(lambda
'Fee' row: # Select between indexes 1 and 4 (2,3,4)
row[df['Courses'].isin(['Spark','PySpark'])])
:[20000,25000,22000,30000],
df = df.astype({"Fee": int, "Discount": float})
df.dropna()
df['A'] = df['A'].apply(np.square)
'Duration':['30days','40days','35days','50days'],
rows_count
df["B"] = len(df.index)
= df["B"].apply(add_4)
}
df2 = df[ (df['Fee']
pd.to_numeric(df.col_three, >= 22000) & (df['Discount'] == 2300)]
errors='coerce').fillna(0)
rows_count
df[['A','B']] = len(df.axes[0])
= df[['A','B']].apply(add_3)
index_labels=['r1','r2','r3','r4']
df = df.reindex(['Courses','Duration','Fee','Discount'],
df = df.apply(pd.to_numeric,
technologies = ({ errors='coerce').fillna(0) axis=1)
rows_count
df["A"]
df1
def = = df.shape[0]
= df["A"].apply(lambda x: x-2)
pd.DataFrame(technologies,index=index_labels)
summary(df):
df2 = df[['Discount',"Fee","Courses","Duration"]]
'Courses':["Spark","PySpark","Hadoop","Python","Pandas","Hadoop","Spark","Python","NA"],
movies_df[(movies_df['director'] == 'Christopher Nolan') | (movies_df['director']
rows_count = df.count()[0]
movies_df["rating"].apply(lambda x:col'good' if x >= 8.0
df2 = df[ :[22000,25000,23000,24000,26000,25000,25000,22000,1500],
== 'Fee'
'Ridley ['Duration']
Scott')].head() + [ col for in df.columns ifelse
col 'bad')
!= 'Duration']]
df['month'].apply(lambda
technologies2
types = df.dtypes= { x: month_labels[x]) - month_labels is a dict
'Duration':['30days','50days','55days','40days','60days','35days','30days','50days','40day
movies_df[movies_df['director'].isin(['Christopher Nolan', 'Ridley Scott'])].head()
'Courses':["Spark","Java","Python","Go"],
counts = df.apply(lambda x: x.count())
'Discount':[1000,2300,1000,1200,2500,None,1400,1600,0]
movies_df[
'Discount':[2000,2300,1200,2000]
uniques = df.apply(lambda
df3=pd.merge(df1,df2, x: [x.unique()])
on='Courses')
})
((movies_df['year'] >= 2005) & (movies_df['year'] <= 2010))
df3 &nas = }
df.apply(lambda
= (movies_df['rating']
pd.merge(df3, df1, how='left', x: x.isnull().sum())
left_on=['Col1','col2'], right_on = ['col1','col2'])
https://datagy.io/pandas-groupby/df2 > 8.0) = df.groupby(['Courses','Duration']).sum().reset_index()g
index_labels2=['r1','r6','r3','r5']
distincts = df.apply(lambda
df3=pd.merge(df1,df2, on='Courses', x: x.unique().shape[0])
how='right')
& (movies_df['revenue_millions'] < movies_df['revenue_millions'].quantile(0.25))
df2 = pd.DataFrame(technologies2,index=index_labels2)
df['Months']
missing ==(df.isnull().sum()
df3=pd.merge(df1,df2, df['Date'].dt.month
on='Courses', / df.shape[0])
how='outer')* 100
]
df['Weekday']
def sk = =
df.skew()
df3=pd.merge(df1,df2,
my_agg(x): df['Date'].dt.weekday
on='Courses', how='left')
df['Year']
krt = =
names ={df['Date'].dt.year
df.kurt()
mean_income
df['Weekday = df['income'].mean()
Name'] = x['Price'].mean(),
df['Date'].dt.weekday_name
'PriceMean':
df['higher_than_avg_income']
df['date'].dt.month_name()
print('Data shape:', = df['income'].map(lambda x: x > mean_income)
df.shape)
'VolumeMax': x['Volume'].max(),
Date is 'DailyRevMean':
in ccyy-mm-dd x['Daily Revenue'].mean(),
cols = ['Type', 'Total
https://datagy.io/pandas-cut-qcut/
'DailyRevMax': count',
x['Daily 'Null Values', 'Distinct Values', 'Missing Ratio', 'Unique
Revenue'].max()
import pandas as pd
dtls
} = pd.concat([types, counts, nas, distincts, missing, uniques, sk, krt], axis=1, sort=
https://github.com/softhints/Pandas-Tutorials
laptops = pd.read_csv('laptops.csv', encoding='Latin-1')
import calendar
def clean_col(col):
print(x, ":", calendar.month_abbr[x], "-", calendar.month_name[x])
dtls.columns
return = cols
pd.Series(names, index=[ key for key in names.keys()])
col = col.strip()
return dtls
col = col.replace("(",'')
df.groupby('Store
drinks.loc[::-1].head() ID').apply(my_agg).head(10)
col = col.replace(")",'') - Rows
drinks.loc[:, ::-1].head()
col==movies.genre.value_counts()
counts col.lower() - Columns
return col
movies[movies.genre.isin(counts.nlargest(3).index)].head()
format_dict = {'Date':'{:%m/%d/%y}', 'Close':'${:.2f}', 'Volume':'{:,}'}
new_columns = []
stocks.style.format(format_dict)
for c in laptops.columns:
new_columns.append(clean_col(c))
laptops.columns = new_columns
Notes
ubusercontent.com/datagy/data/main/sample_dates.csv')pd.read_excel('https://github.com/datagy/pivot_tab

roupby('Courses',sort=False).sum()sortedDF=groupedDF.sort_values('Courses', ascending=False)df.groupby(

ess', 'Kurtosis']
.com/datagy/pivot_table_pandas/raw/master/sample_pivot.xlsx', parse_dates=['Date'])pd.read_csv("https:/

ing=False)df.groupby('region').ngroupsprint(df.groupby('region').groups) - dict key,value pairsprint(df


)pd.read_csv("https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv")pd.read_json("htt

y,value pairsprint(df.groupby('region').groups.keys())print(df.groupby('region').get_group('South')) -
sv")pd.read_json("https://data.cityofchicago.org/resource/tq3e-t5yq.json")"https://raw.githubuserconten

et_group('South')) - Select a groupsums = df.groupby(['region', 'gender']).sum()Aggregating:averages =


/raw.githubusercontent.com/cmdlinetips/data/master/gapminder-FiveYearData.csv"http://archive.ics.uci.ed

gregating:averages = df.groupby('region')['sales'].mean() -- Also .sum(), .mean(), .max()import numpy a


://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.datahttps://raw.githubusercontent.com/fi

.max()import numpy as npaggs = df.groupby('region')['sales'].agg([np.mean, np.std, np.var])Rank:df['ra


hubusercontent.com/fivethirtyeight/data/master/airline-safety/airline-safety.csvurl = 'https://github.c

, np.var])Rank:df['rank'] = df.groupby(['region', 'gender'])['sales'].rank(ascending=False)


l = 'https://github.com/chris1610/pbpython/blob/master/data/2018_Sales_Total_v2.xlsx?raw=True'drinks =
sx?raw=True'drinks = pd.read_csv('http://bit.ly/drinksbycountry')movies = pd.read_csv('http://bit.ly/im
csv('http://bit.ly/imdbratings')orders = pd.read_csv('http://bit.ly/chiporders', sep='\t')orders['item_
ep='\t')orders['item_price'] = orders.item_price.str.replace('$', '').astype('float')stocks = pd.read_c
')stocks = pd.read_csv('http://bit.ly/smallstocks', parse_dates=['Date'])titanic = pd.read_csv('http://
pd.read_csv('http://bit.ly/kaggletrain')ufo = pd.read_csv('http://bit.ly/uforeports', parse_dates=['Ti
ts', parse_dates=['Time'])bit.ly/chipordersbit.ly/drinksbycountrybit.ly/imdbratingsbit.ly/smallstocksbi
gsbit.ly/smallstocksbit.ly/kaggletestbit.ly/kaggletrainbit.ly/movielensdatabit.ly/movieitemsbit.ly/movi
movieitemsbit.ly/movieusersbit.ly/uforeports
df3=pd.merge(df1,df2, on='Courses', how='right')
print(df3)

# Merge by outer join


df3=pd.merge(df1,df2, on='Courses', how='outer')
MERGE print(df3)

https://www.studytonight.com/python/property-in-python

You might also like