[PBS] Torque+Maui 스케쥴러 설치
####################################################################################################
# PBS install
####################################################################################################
# 소스 파일 압축 해제
[root@orion ~]# tar xvfz torque-2.3.0.tar.gz
# configure
# 옵션은 아래 싸이트 참고
# http://www.clusterresources.com/torquedocs21/1.3advconfig.shtml#configoptions
#
[root@orion torque-2.3.0]# ./configure \
–enable-server –enable-clients \
–enable-docs –enable-mom \
–prefix=/usr/local \
–with-default-server=orion \
–with-scp
# compile
[root@orion torque-2.3.0]# make
# 설치
[root@orion torque-2.3.0]# make install
# 설치 후 torque (pbs) 서버 기본 설정
[root@orion torque-2.3.0]# ./torque.setup root
initializing TORQUE (admin: root@orion.xxxx.ac.kr)
PBS_Server orion.xxxx.ac.kr: Create mode and server database exists,
do you wish to continue y/(n)?y
Max open servers: 4
Max open servers: 4
# pbs에서 사용할 노드 등록
# np=4, 각 노드에 4개의 cpu가 있음을 의미
[root@orion torque-2.3.0]# vi /var/spool/torque/server_priv/nodes
orion np=4
node01 np=4
node02 np=4
node03 np=4
# pbs_mom은 실제 job을 수행할 compute node에서 구성
# orion (master) 노드에서도 job이 수행되므로 아래 파일 설정
# 자세한 설정은 아래 링크 참조
# http://www.clusterresources.com/torquedocs21/a.cmomconfig.shtml
#
[root@orion torque-2.3.0]# vi /var/spool/torque/mom_priv/config
$pbsserver orion
$logevent 255
# pbs server 지정
# job이 수행될 모든 노드에 설정
[root@orion torque-2.3.0]# vi /var/spool/torque/server_name
orion
# rebooting 이후 자동으로 실행되도록 설정
[root@orion sbin]# echo “/usr/local/sbin/pbs_mom” >> /etc/rc.d/rc.local
[root@orion sbin]# echo “/usr/local/sbin/pbs_server” >> /etc/rc.d/rc.local
[root@orion sbin]# echo “/usr/local/sbin/pbs_sched” >> /etc/rc.d/rc.local
####################################################################################################
# install torque on compute nodes
####################################################################################################
# compute 노드에 pbs client 설치 및 설정
# 먼저 orion (master) 노드에서 패키지를 생성
[root@orion torque-2.3.0]# make packages
# 생성된 패키지를 /data1 (NFS로 공유된 디렉토리)에 복사
[root@orion torque-2.3.0]# cp *.sh /data1
# 모든노드에 /data1 디렉토리에 있는 파일을 이용하여 설치
# 먼저 pbs_mom 설치
[root@orion ~]# psh compute ‘/data1/torque-package-mom-linux-x86_64.sh –install’
node01:
node01: Installing TORQUE archive…
node01:
node01: Done.
node02:
node02: Installing TORQUE archive…
node02:
node02: Done.
node03:
node03: Installing TORQUE archive…
node03:
node03: Done.
# 같은 과정으로 client, doc, devel 설치
[root@orion ~]# psh compute ‘/data1/torque-package-clients-linux-x86_64.sh –install’
[root@orion ~]# psh compute ‘/data1/torque-package-doc-linux-x86_64.sh –install’
[root@orion ~]# psh compute ‘/data1/torque-package-devel-linux-x86_64.sh –install’
# rebooting 이후 자동으로 pbs_mom 이 시작되도록 설정
[root@orion ~]# psh compute ‘echo “/usr/local/sbin/pbs_mom” >> /etc/rc.d/rc.local’
# jobs이 실행될 수 있도록 pbs_mom 설정 변경
[root@orion sbin]# psh all ‘echo “\$pbsserver orion” > /var/spool/torque/mom_priv/config’
[root@orion sbin]# psh all ‘echo “\$logevent 255” >> /var/spool/torque/mom_priv/config’
[root@orion ~]# psh compute ‘echo “orion” > /var/spool/torque/server_name’
# 설정을 마친 후 pbs_mom 시작
[root@orion ~]# psh compute ‘/usr/local/sbin/pbs_mom’
# pbs 정상 설정여부 확인
# 모든 노드에서 free 라고 올라와야 함
# down 이라고 표시되면, pbs_mom을 재시작해야 함
[root@orion ~]# pbsnodes -a
# pbs_mom 재시작 방법
[root@orion ~]# psh compute ‘killall pbs_mom’
[root@orion ~]# psh compute ‘pbs_mom’
####################################################################################################
# queue 테스트
####################################################################################################
#
# 일반 유저로 로그인해서
# job submit 테스트 수행
[user1@orion ~]$ echo “sleep 5” | qsub
[user1@orion ~]$ qstat