Data Manipulation
Data wrangling
Print some words that start with a particular string (e.g. words start with 'phy')
# If file is not specified, the file /usr/share/dict/words is used.
look phy|head -n 10
# Phil
# Philadelphia
# Philadelphia's
# Philby
# Philby's
# Philip
# Philippe
# Philippe's
# Philippians
# PhilippineRepeat printing string n times (e.g. print 'hello world' five times)
printf 'hello world\n%.0s' {1..5}Do not echo the trailing newline
username=`echo -n "bashoneliner"`Copy a file to multiple files (e.g copy fileA to file(B-D))
tee <fileA fileB fileC fileD >/dev/nullDelete all non-printing characters
tr -dc '[:print:]' < filenameRemove newline / nextline
tr --delete '\n' <input.txt >output.txtReplace newline
tr '\n' ' ' <filenameTo uppercase/lowercase
tr /a-z/ /A-Z/Translate a range of characters (e.g. substitute a-z into a)
echo 'something' |tr a-z a
# aaaaaaaaaCompare two files (e.g. fileA, fileB)
diff fileA fileB
# a: added; d:delete; c:changed
# or
sdiff fileA fileB
# side-to-side merge of file differencesCompare two files, strip trailing carriage return/ nextline (e.g. fileA, fileB)
 diff fileA fileB --strip-trailing-crNumber a file (e.g. fileA)
nl fileA
#or
nl -nrz fileA
# add leading zeros
#or
nl -w1 -s ' '
# making it simple, blank separateJoin two files field by field with tab (default join by the first column of both file, and default separator is space)
# fileA and fileB should have the same ordering of lines.
join -t '\t' fileA fileB
# Join using specified field (e.g. column 3 of fileA and column 5 of fileB)
join -1 3 -2 5 fileA fileBCombine/ paste two or more files into columns (e.g. fileA, fileB, fileC)
paste fileA fileB fileC
# default tab separateGroup/combine rows into one row
# e.g.
# AAAA
# BBBB
# CCCC
# DDDD
cat filename|paste - -
# AAAABBBB
# CCCCDDDD
cat filename|paste - - - -
# AAAABBBBCCCCDDDDFastq to fasta (fastq and fasta are common file formats for bioinformatics sequence data)
cat file.fastq | paste - - - - | sed 's/^@/>/g'| cut -f1-2 | tr '\t' '\n' >file.faReverse string
echo 12345| revGenerate sequence 1-10
seq 10Find average of input list/file of integers
i=`wc -l filename|cut -d ' ' -f1`; cat filename| echo "scale=2;(`paste -sd+`)/"$i|bcGenerate all combination (e.g. 1,2)
echo {1,2}{1,2}
# 1 1, 1 2, 2 1, 2 2Generate all combination (e.g. A,T,C,G)
set = {A,T,C,G}
group= 5
for ((i=0; i<$group; i++));do
    repetition=$set$repetition;done
    bash -c "echo "$repetition""Read file content to variable
foo=$(<test1)Echo size of variable
echo ${#foo}Echo a tab
echo -e ' \t 'Split file into smaller file
# Split by line (e.g. 1000 lines/smallfile)
split -d -l 1000 largefile.txt
# Split by byte without breaking lines across files
split -C 10 largefile.txtCreate a large amount of dummy files (e.g 100000 files, 10 bytes each):
#1. Create a big file
dd if=/dev/zero of=bigfile bs=1 count=1000000
#2. Split the big file to 100000 10-bytes files
 split -b 10 -a 10 bigfileRename all files (e.g. remove ABC from all .gz files)
rename 's/ABC//' *.gzRemove file extension (e.g remove .gz from filename.gz)
basename filename.gz .gz
zcat filename.gz> $(basename filename.gz .gz).unpackedAdd file extension to all file(e.g add .txt)
rename s/$/.txt/ *
# You can use rename -n s/$/.txt/ * to check the result first, it will only print sth like this:
# rename(a, a.txt)
# rename(b, b.txt)
# rename(c, c.txt)Squeeze repeat patterns (e.g. /t/t --> /t)
tr -s "/t" < filenameDo not print nextline with echo
echo -e 'text here \c'View first 50 characters of file
head -c 50 fileCut and get last column of a file
cat file|rev | cut -d/ -f1 | revAdd one to variable/increment/ i++ a numeric variable (e.g. $var)
((var++))
# or
var=$((var+1))Cut the last column
cat filename|rev|cut -f1|revCat to a file
cat >myfile
let me add sth here
exit by control + c
^CClear the contents of a file (e.g. filename)
>filenameAppend to file (e.g. hihi)
echo 'hihi' >>filenameWorking with json data
#install the useful jq package
#sudo apt-get install jq
#e.g. to get all the values of the 'url' key, simply pipe the json to the following jq command(you can use .[]. to select inner json, i.e jq '.[].url')
cat file.json | jq '.url'Decimal to Binary (e.g get binary of 5)
D2B=({0..1}{0..1}{0..1}{0..1}{0..1}{0..1}{0..1}{0..1})
echo -e ${D2B[5]}
#00000101
echo -e ${D2B[255]}
#11111111Wrap each input line to fit in specified width (e.g 4 integers per line)
echo "00110010101110001101" | fold -w4
# 0011
# 0010
# 1011
# 1000
# 1101Sort a file by column and keep the original order
sort -k3,3 -sRight align a column (right align the 2nd column)
cat file.txt|rev|column -t|revTo both view and store the output
echo 'hihihihi' | tee outputfile.txt
# use '-a' with tee to append to file.Show non-printing (Ctrl) characters with cat
cat -v filenameConvert tab to space
expand filenameConvert space to tab
unexpand filenameDisplay file in octal ( you can also use od to display hexadecimal, decimal, etc)
od filenameReverse cat a file
tac filenameReverse the result from uniq -c
while read a b; do yes $b |head -n $a ;done <test.txtLast updated
Was this helpful?